START

Goal/Purpose of operations: 
PCA analysis of the the new new cancers (lung, pancreas, liver) to evulate and determine any covariate. Panceratic cancer had a less common subtype driving the PC1. However, it did not solve the issue that most cancers the tumor and normal are split in PC1, PC2, PC3. I checked 10 PCs and double checked the sample labels. This is due to low tumor purity in pancreatic cancers.  
liver cancer shows split between tumor and normal. Liver and lung seems to be influence by the time the tissue was removed to RNA prep (also seen in the RIN score). There is not a good/easy way to use this a covariate because the samples between GTEX and TCGA will vary, but important to note here and other place to highlight a limitation. 

Finished psedocode on: 
220524

System which operations were done on:
my laptop

GitHub Repo:
Transfer_Learning_R03

Docker:
rstudio_cancer_dr

Directory of operations: 
/home

Scripts being edited for operations:
NA

Data being used: 
Recount3

Papers and tools:
DESeq2
prcomp

STEPS

Set working directory

load in data

library(recount3)
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
library(SummarizedExperiment)
library(stringr)

Analysis

PAAD

paad_tcga_metadata <- readRDS("~/data/recount3/recount3_fix_download/paad_tcga_metadata.rds")
paad_tcga_counts <- readRDS("~/data/recount3/recount3_fix_download/paad_tcga_counts.rds")

check counts and metadata info

dim(paad_tcga_counts)
## [1] 63856   183
dim(paad_tcga_metadata)
## [1] 183 840
nchar(colnames(paad_tcga_counts)[1])
## [1] 37
#https://stackoverflow.com/questions/7963898/extracting-the-last-n-characters-from-a-string-in-r
substrRight <- function(x, n){
  substr(x, nchar(x)-n+1, nchar(x))
}
ids<- substrRight(colnames(paad_tcga_counts), 36)
ids<- str_replace_all(ids, "[[:punct:]]", "-")
colnames(paad_tcga_counts) <- ids
paad_tcga_counts_order <- paad_tcga_counts[,order(ids)]
paad_tcga_metadata_order<- paad_tcga_metadata[order(paad_tcga_metadata$external_id), ]
identical(colnames(paad_tcga_counts_order), paad_tcga_metadata_order$external_id)
## [1] TRUE
colnames(paad_tcga_counts_order)<- paad_tcga_metadata_order$tcga_barcode
saveRDS(paad_tcga_counts_order,  "/home/rstudio/data/paad_tcga_count_ordered.rds")
saveRDS(paad_tcga_metadata_order,  "/home/rstudio/data/paad_tcga_metadata_ordered.rds")

import the pancreas data

pan_gtex_counts <- readRDS("~/data/recount3/recount3_fix_download/pan_gtex_counts.rds")
pan_gtex_metadata <- readRDS("~/data/recount3/recount3_fix_download/pan_gtex_metadata.rds")
colnames(pan_gtex_counts)[1:5]
## [1] "GTEX.WFG7.0426.SM.4LMK5.1"  "GTEX.1211K.1126.SM.5EGGB.1"
## [3] "GTEX.13FTX.1226.SM.5IFGN.1" "GTEX.145MN.1426.SM.5SI9H.1"
## [5] "GTEX.13O61.2126.SM.5IJEO.1"
pan_gtex_metadata$external_id[1:5]
## [1] "GTEX-111CU-0526-SM-5EGHK.1" "GTEX-111YS-1226-SM-5EGGJ.1"
## [3] "GTEX-1122O-0726-SM-5GIEV.1" "GTEX-1128S-0826-SM-5GZZI.1"
## [5] "GTEX-117YX-0226-SM-5EGH6.1"
ids<- str_replace_all(colnames(pan_gtex_counts), "[[:punct:]]", "-") 
meta_data_ids<- str_replace_all(pan_gtex_metadata$external_id, "[[:punct:]]", "-")
identical(ids[order(ids)], meta_data_ids[order(meta_data_ids)])
## [1] TRUE
pan_gtex_counts_order <- pan_gtex_counts[,order(ids)]
pan_gtex_metadata_order<- pan_gtex_metadata[order(meta_data_ids), ]
colnames(pan_gtex_counts_order )<- pan_gtex_metadata_order$external_id
recount3_count_pan <- as.data.frame(pan_gtex_counts_order)
saveRDS(recount3_count_pan,  "/home/rstudio/data/pan_gtex_count_ordered.rds")
saveRDS(pan_gtex_metadata_order,  "/home/rstudio/data/pan_gtex_metadata_ordered.rds")
library(DESeq2)
#colData(recount3_rse_PANCREAS)
vst_table <- vst(as.matrix(recount3_count_pan))
vst_table_df <- t(vst_table)
pca.tumor <- prcomp(vst_table_df)
summary(pca.tumor)
## Importance of components:
##                            PC1      PC2      PC3      PC4      PC5      PC6
## Standard deviation     30.0452 24.42454 22.20122 17.48497 15.98831 15.41468
## Proportion of Variance  0.1084  0.07166  0.05921  0.03673  0.03071  0.02854
## Cumulative Proportion   0.1084  0.18010  0.23931  0.27604  0.30675  0.33529
##                             PC7     PC8      PC9     PC10     PC11     PC12
## Standard deviation     13.82431 13.2224 12.49573 11.92589 11.18827 10.42483
## Proportion of Variance  0.02296  0.0210  0.01876  0.01709  0.01504  0.01306
## Cumulative Proportion   0.35825  0.3792  0.39801  0.41509  0.43013  0.44319
##                            PC13    PC14    PC15    PC16   PC17    PC18    PC19
## Standard deviation     10.29831 9.53619 9.19273 8.93092 8.4630 8.39932 8.27100
## Proportion of Variance  0.01274 0.01092 0.01015 0.00958 0.0086 0.00847 0.00822
## Cumulative Proportion   0.45593 0.46685 0.47700 0.48658 0.4952 0.50366 0.51188
##                           PC20    PC21    PC22    PC23    PC24    PC25    PC26
## Standard deviation     8.17212 7.80879 7.46373 7.39137 7.25817 6.91705 6.87114
## Proportion of Variance 0.00802 0.00733 0.00669 0.00656 0.00633 0.00575 0.00567
## Cumulative Proportion  0.51990 0.52723 0.53392 0.54048 0.54681 0.55256 0.55823
##                           PC27    PC28    PC29    PC30    PC31    PC32    PC33
## Standard deviation     6.68168 6.60970 6.36138 6.30886 6.09662 5.90330 5.86881
## Proportion of Variance 0.00536 0.00525 0.00486 0.00478 0.00446 0.00419 0.00414
## Cumulative Proportion  0.56359 0.56884 0.57370 0.57848 0.58295 0.58713 0.59127
##                           PC34    PC35    PC36   PC37    PC38    PC39    PC40
## Standard deviation     5.74068 5.67335 5.64255 5.5463 5.48430 5.38641 5.31574
## Proportion of Variance 0.00396 0.00387 0.00382 0.0037 0.00361 0.00349 0.00339
## Cumulative Proportion  0.59523 0.59910 0.60292 0.6066 0.61023 0.61372 0.61711
##                          PC41    PC42    PC43    PC44    PC45    PC46    PC47
## Standard deviation     5.2437 5.21458 5.17741 5.14322 5.13541 5.04757 4.98279
## Proportion of Variance 0.0033 0.00327 0.00322 0.00318 0.00317 0.00306 0.00298
## Cumulative Proportion  0.6204 0.62368 0.62690 0.63008 0.63324 0.63631 0.63929
##                           PC48    PC49   PC50    PC51    PC52    PC53    PC54
## Standard deviation     4.95204 4.88953 4.8310 4.79131 4.76808 4.73406 4.65916
## Proportion of Variance 0.00295 0.00287 0.0028 0.00276 0.00273 0.00269 0.00261
## Cumulative Proportion  0.64223 0.64511 0.6479 0.65067 0.65340 0.65609 0.65870
##                          PC55    PC56    PC57    PC58    PC59    PC60    PC61
## Standard deviation     4.6532 4.61455 4.58852 4.55207 4.53557 4.50429 4.43300
## Proportion of Variance 0.0026 0.00256 0.00253 0.00249 0.00247 0.00244 0.00236
## Cumulative Proportion  0.6613 0.66386 0.66639 0.66888 0.67135 0.67378 0.67614
##                           PC62   PC63    PC64    PC65    PC66   PC67    PC68
## Standard deviation     4.39124 4.3779 4.34156 4.32945 4.29285 4.2801 4.21635
## Proportion of Variance 0.00232 0.0023 0.00226 0.00225 0.00221 0.0022 0.00214
## Cumulative Proportion  0.67846 0.6808 0.68303 0.68528 0.68749 0.6897 0.69183
##                           PC69    PC70    PC71    PC72    PC73    PC74   PC75
## Standard deviation     4.20065 4.19632 4.16015 4.11565 4.10750 4.09884 4.0757
## Proportion of Variance 0.00212 0.00212 0.00208 0.00203 0.00203 0.00202 0.0020
## Cumulative Proportion  0.69395 0.69606 0.69814 0.70018 0.70220 0.70422 0.7062
##                           PC76    PC77    PC78    PC79    PC80    PC81    PC82
## Standard deviation     4.06148 4.03849 4.01996 3.99225 3.95753 3.93899 3.92053
## Proportion of Variance 0.00198 0.00196 0.00194 0.00191 0.00188 0.00186 0.00185
## Cumulative Proportion  0.70820 0.71016 0.71210 0.71402 0.71590 0.71776 0.71961
##                           PC83   PC84   PC85    PC86    PC87    PC88    PC89
## Standard deviation     3.89028 3.8750 3.8697 3.85486 3.85328 3.83204 3.81567
## Proportion of Variance 0.00182 0.0018 0.0018 0.00179 0.00178 0.00176 0.00175
## Cumulative Proportion  0.72142 0.7232 0.7250 0.72681 0.72860 0.73036 0.73211
##                           PC90    PC91   PC92   PC93    PC94    PC95    PC96
## Standard deviation     3.78876 3.78517 3.7606 3.7569 3.72901 3.71584 3.70743
## Proportion of Variance 0.00172 0.00172 0.0017 0.0017 0.00167 0.00166 0.00165
## Cumulative Proportion  0.73383 0.73555 0.7372 0.7389 0.74062 0.74228 0.74393
##                           PC97    PC98    PC99   PC100   PC101   PC102   PC103
## Standard deviation     3.70552 3.67585 3.66773 3.66004 3.64324 3.63664 3.61695
## Proportion of Variance 0.00165 0.00162 0.00162 0.00161 0.00159 0.00159 0.00157
## Cumulative Proportion  0.74558 0.74720 0.74882 0.75043 0.75202 0.75361 0.75518
##                          PC104   PC105   PC106   PC107   PC108  PC109   PC110
## Standard deviation     3.59102 3.58566 3.56702 3.55408 3.54025 3.5280 3.52453
## Proportion of Variance 0.00155 0.00154 0.00153 0.00152 0.00151 0.0015 0.00149
## Cumulative Proportion  0.75673 0.75828 0.75980 0.76132 0.76283 0.7643 0.76581
##                          PC111   PC112   PC113   PC114   PC115   PC116   PC117
## Standard deviation     3.50914 3.49500 3.47517 3.46063 3.45819 3.44449 3.44035
## Proportion of Variance 0.00148 0.00147 0.00145 0.00144 0.00144 0.00143 0.00142
## Cumulative Proportion  0.76729 0.76876 0.77021 0.77165 0.77309 0.77451 0.77593
##                          PC118   PC119  PC120   PC121   PC122   PC123   PC124
## Standard deviation     3.43398 3.42124 3.4163 3.40766 3.40026 3.39654 3.37905
## Proportion of Variance 0.00142 0.00141 0.0014 0.00139 0.00139 0.00139 0.00137
## Cumulative Proportion  0.77735 0.77876 0.7802 0.78155 0.78294 0.78433 0.78570
##                          PC125   PC126   PC127   PC128   PC129   PC130   PC131
## Standard deviation     3.37470 3.36001 3.35779 3.34735 3.34317 3.33246 3.32917
## Proportion of Variance 0.00137 0.00136 0.00135 0.00135 0.00134 0.00133 0.00133
## Cumulative Proportion  0.78707 0.78842 0.78978 0.79112 0.79247 0.79380 0.79513
##                          PC132   PC133  PC134  PC135  PC136   PC137   PC138
## Standard deviation     3.31455 3.30845 3.2928 3.2904 3.2836 3.27397 3.26140
## Proportion of Variance 0.00132 0.00131 0.0013 0.0013 0.0013 0.00129 0.00128
## Cumulative Proportion  0.79645 0.79777 0.7991 0.8004 0.8017 0.80295 0.80423
##                          PC139   PC140   PC141   PC142   PC143   PC144   PC145
## Standard deviation     3.25339 3.25126 3.24309 3.23593 3.23245 3.21763 3.21064
## Proportion of Variance 0.00127 0.00127 0.00126 0.00126 0.00126 0.00124 0.00124
## Cumulative Proportion  0.80550 0.80677 0.80804 0.80929 0.81055 0.81179 0.81303
##                          PC146   PC147   PC148   PC149   PC150   PC151  PC152
## Standard deviation     3.20309 3.19907 3.18999 3.18243 3.18105 3.16820 3.1634
## Proportion of Variance 0.00123 0.00123 0.00122 0.00122 0.00122 0.00121 0.0012
## Cumulative Proportion  0.81426 0.81549 0.81672 0.81793 0.81915 0.82035 0.8216
##                         PC153  PC154   PC155   PC156   PC157   PC158   PC159
## Standard deviation     3.1606 3.1558 3.14554 3.14009 3.13754 3.13344 3.12557
## Proportion of Variance 0.0012 0.0012 0.00119 0.00118 0.00118 0.00118 0.00117
## Cumulative Proportion  0.8228 0.8239 0.82514 0.82632 0.82751 0.82869 0.82986
##                          PC160   PC161   PC162   PC163   PC164   PC165   PC166
## Standard deviation     3.11755 3.11395 3.10581 3.09356 3.08938 3.08305 3.07873
## Proportion of Variance 0.00117 0.00116 0.00116 0.00115 0.00115 0.00114 0.00114
## Cumulative Proportion  0.83103 0.83219 0.83335 0.83450 0.83565 0.83679 0.83793
##                          PC167   PC168   PC169   PC170   PC171   PC172   PC173
## Standard deviation     3.07462 3.06430 3.06022 3.05670 3.05033 3.04684 3.04265
## Proportion of Variance 0.00114 0.00113 0.00112 0.00112 0.00112 0.00112 0.00111
## Cumulative Proportion  0.83906 0.84019 0.84132 0.84244 0.84356 0.84467 0.84578
##                          PC174  PC175  PC176   PC177   PC178   PC179   PC180
## Standard deviation     3.03959 3.0319 3.0270 3.01503 3.01049 3.00594 2.99494
## Proportion of Variance 0.00111 0.0011 0.0011 0.00109 0.00109 0.00109 0.00108
## Cumulative Proportion  0.84689 0.8480 0.8491 0.85019 0.85128 0.85237 0.85344
##                          PC181   PC182   PC183   PC184   PC185   PC186   PC187
## Standard deviation     2.99439 2.99037 2.98379 2.97946 2.97711 2.97043 2.96251
## Proportion of Variance 0.00108 0.00107 0.00107 0.00107 0.00106 0.00106 0.00105
## Cumulative Proportion  0.85452 0.85559 0.85666 0.85773 0.85879 0.85985 0.86091
##                          PC188   PC189   PC190   PC191   PC192   PC193   PC194
## Standard deviation     2.95961 2.95624 2.95141 2.94277 2.94109 2.93892 2.92771
## Proportion of Variance 0.00105 0.00105 0.00105 0.00104 0.00104 0.00104 0.00103
## Cumulative Proportion  0.86196 0.86301 0.86406 0.86510 0.86614 0.86717 0.86820
##                          PC195   PC196   PC197   PC198   PC199   PC200   PC201
## Standard deviation     2.92429 2.92160 2.91835 2.91460 2.90932 2.90439 2.90132
## Proportion of Variance 0.00103 0.00103 0.00102 0.00102 0.00102 0.00101 0.00101
## Cumulative Proportion  0.86923 0.87026 0.87128 0.87230 0.87332 0.87433 0.87534
##                          PC202   PC203  PC204  PC205   PC206   PC207   PC208
## Standard deviation     2.89606 2.89341 2.8871 2.8840 2.87691 2.87161 2.86633
## Proportion of Variance 0.00101 0.00101 0.0010 0.0010 0.00099 0.00099 0.00099
## Cumulative Proportion  0.87635 0.87735 0.8784 0.8794 0.88035 0.88134 0.88233
##                          PC209   PC210   PC211   PC212   PC213   PC214   PC215
## Standard deviation     2.86406 2.85968 2.85100 2.84725 2.84085 2.83965 2.83553
## Proportion of Variance 0.00099 0.00098 0.00098 0.00097 0.00097 0.00097 0.00097
## Cumulative Proportion  0.88331 0.88429 0.88527 0.88624 0.88721 0.88818 0.88915
##                          PC216   PC217   PC218   PC219   PC220   PC221   PC222
## Standard deviation     2.83486 2.82591 2.82280 2.82101 2.81837 2.81104 2.80575
## Proportion of Variance 0.00097 0.00096 0.00096 0.00096 0.00095 0.00095 0.00095
## Cumulative Proportion  0.89011 0.89107 0.89203 0.89299 0.89394 0.89489 0.89584
##                          PC223   PC224   PC225   PC226   PC227   PC228   PC229
## Standard deviation     2.80220 2.80051 2.79801 2.79246 2.79073 2.78776 2.78263
## Proportion of Variance 0.00094 0.00094 0.00094 0.00094 0.00094 0.00093 0.00093
## Cumulative Proportion  0.89678 0.89772 0.89866 0.89960 0.90053 0.90147 0.90240
##                          PC230   PC231   PC232   PC233   PC234   PC235   PC236
## Standard deviation     2.77758 2.77580 2.77156 2.76483 2.76372 2.75992 2.75661
## Proportion of Variance 0.00093 0.00093 0.00092 0.00092 0.00092 0.00092 0.00091
## Cumulative Proportion  0.90332 0.90425 0.90517 0.90609 0.90701 0.90792 0.90884
##                          PC237   PC238   PC239  PC240  PC241  PC242   PC243
## Standard deviation     2.75409 2.75153 2.74683 2.7435 2.7381 2.7352 2.72938
## Proportion of Variance 0.00091 0.00091 0.00091 0.0009 0.0009 0.0009 0.00089
## Cumulative Proportion  0.90975 0.91066 0.91156 0.9125 0.9134 0.9143 0.91516
##                          PC244   PC245   PC246   PC247   PC248   PC249   PC250
## Standard deviation     2.72086 2.71925 2.71426 2.71126 2.70440 2.70197 2.69713
## Proportion of Variance 0.00089 0.00089 0.00089 0.00088 0.00088 0.00088 0.00087
## Cumulative Proportion  0.91605 0.91694 0.91782 0.91871 0.91959 0.92046 0.92134
##                          PC251   PC252   PC253   PC254   PC255   PC256   PC257
## Standard deviation     2.69472 2.69004 2.68899 2.68627 2.68273 2.67442 2.67177
## Proportion of Variance 0.00087 0.00087 0.00087 0.00087 0.00086 0.00086 0.00086
## Cumulative Proportion  0.92221 0.92308 0.92395 0.92481 0.92568 0.92654 0.92740
##                          PC258   PC259   PC260   PC261   PC262   PC263   PC264
## Standard deviation     2.66873 2.66397 2.66311 2.65899 2.65795 2.65231 2.64553
## Proportion of Variance 0.00086 0.00085 0.00085 0.00085 0.00085 0.00085 0.00084
## Cumulative Proportion  0.92825 0.92910 0.92996 0.93080 0.93165 0.93250 0.93334
##                          PC265   PC266   PC267   PC268   PC269   PC270   PC271
## Standard deviation     2.64220 2.64058 2.63780 2.62962 2.62802 2.62607 2.62027
## Proportion of Variance 0.00084 0.00084 0.00084 0.00083 0.00083 0.00083 0.00082
## Cumulative Proportion  0.93418 0.93502 0.93585 0.93668 0.93751 0.93834 0.93916
##                          PC272   PC273   PC274   PC275   PC276   PC277   PC278
## Standard deviation     2.61739 2.61076 2.60683 2.60438 2.60217 2.60122 2.59551
## Proportion of Variance 0.00082 0.00082 0.00082 0.00081 0.00081 0.00081 0.00081
## Cumulative Proportion  0.93999 0.94081 0.94162 0.94244 0.94325 0.94406 0.94487
##                          PC279  PC280  PC281  PC282   PC283   PC284   PC285
## Standard deviation     2.59037 2.5859 2.5807 2.5793 2.57187 2.56755 2.56524
## Proportion of Variance 0.00081 0.0008 0.0008 0.0008 0.00079 0.00079 0.00079
## Cumulative Proportion  0.94568 0.9465 0.9473 0.9481 0.94888 0.94967 0.95046
##                          PC286   PC287   PC288   PC289   PC290   PC291   PC292
## Standard deviation     2.56359 2.55918 2.55455 2.55357 2.55222 2.54862 2.54228
## Proportion of Variance 0.00079 0.00079 0.00078 0.00078 0.00078 0.00078 0.00078
## Cumulative Proportion  0.95125 0.95204 0.95282 0.95360 0.95438 0.95517 0.95594
##                          PC293   PC294   PC295   PC296   PC297   PC298   PC299
## Standard deviation     2.53621 2.53426 2.52963 2.52623 2.52338 2.51839 2.51624
## Proportion of Variance 0.00077 0.00077 0.00077 0.00077 0.00076 0.00076 0.00076
## Cumulative Proportion  0.95671 0.95749 0.95825 0.95902 0.95979 0.96055 0.96131
##                          PC300   PC301   PC302   PC303   PC304   PC305   PC306
## Standard deviation     2.50972 2.50680 2.50170 2.49927 2.49726 2.49308 2.49225
## Proportion of Variance 0.00076 0.00075 0.00075 0.00075 0.00075 0.00075 0.00075
## Cumulative Proportion  0.96207 0.96282 0.96357 0.96432 0.96507 0.96582 0.96656
##                          PC307   PC308   PC309   PC310   PC311   PC312   PC313
## Standard deviation     2.47994 2.47630 2.47078 2.46630 2.45976 2.45874 2.45698
## Proportion of Variance 0.00074 0.00074 0.00073 0.00073 0.00073 0.00073 0.00073
## Cumulative Proportion  0.96730 0.96804 0.96877 0.96950 0.97023 0.97096 0.97168
##                          PC314   PC315   PC316   PC317   PC318   PC319  PC320
## Standard deviation     2.45028 2.44527 2.44052 2.43661 2.42623 2.42522 2.4223
## Proportion of Variance 0.00072 0.00072 0.00072 0.00071 0.00071 0.00071 0.0007
## Cumulative Proportion  0.97240 0.97312 0.97384 0.97455 0.97526 0.97596 0.9767
##                         PC321  PC322  PC323   PC324   PC325   PC326   PC327
## Standard deviation     2.4167 2.4080 2.4075 2.39650 2.39152 2.39005 2.38893
## Proportion of Variance 0.0007 0.0007 0.0007 0.00069 0.00069 0.00069 0.00069
## Cumulative Proportion  0.9774 0.9781 0.9788 0.97945 0.98014 0.98083 0.98151
##                          PC328   PC329   PC330   PC331   PC332   PC333   PC334
## Standard deviation     2.38257 2.37905 2.37524 2.37151 2.36535 2.35834 2.35584
## Proportion of Variance 0.00068 0.00068 0.00068 0.00068 0.00067 0.00067 0.00067
## Cumulative Proportion  0.98219 0.98287 0.98355 0.98423 0.98490 0.98557 0.98623
##                          PC335   PC336   PC337   PC338   PC339   PC340   PC341
## Standard deviation     2.34905 2.34702 2.34303 2.33635 2.32320 2.31386 2.31037
## Proportion of Variance 0.00066 0.00066 0.00066 0.00066 0.00065 0.00064 0.00064
## Cumulative Proportion  0.98690 0.98756 0.98822 0.98887 0.98952 0.99016 0.99081
##                          PC342   PC343   PC344   PC345   PC346   PC347  PC348
## Standard deviation     2.29992 2.29480 2.28931 2.28136 2.26866 2.24575 2.2294
## Proportion of Variance 0.00064 0.00063 0.00063 0.00063 0.00062 0.00061 0.0006
## Cumulative Proportion  0.99144 0.99207 0.99270 0.99333 0.99395 0.99455 0.9951
##                          PC349   PC350   PC351   PC352   PC353   PC354   PC355
## Standard deviation     2.21130 2.19308 2.16722 2.16321 2.12501 2.10110 2.09759
## Proportion of Variance 0.00059 0.00058 0.00056 0.00056 0.00054 0.00053 0.00053
## Cumulative Proportion  0.99574 0.99632 0.99688 0.99744 0.99798 0.99851 0.99904
##                         PC356   PC357     PC358     PC359     PC360
## Standard deviation     2.0404 1.95019 1.459e-13 1.334e-14 5.966e-16
## Proportion of Variance 0.0005 0.00046 0.000e+00 0.000e+00 0.000e+00
## Cumulative Proportion  0.9995 1.00000 1.000e+00 1.000e+00 1.000e+00
sex<- pan_gtex_metadata_order$external_id[pan_gtex_metadata_order$SEX == "2"]
sex<- sex[!is.na(sex)]
tumor_norm <- ifelse( pan_gtex_metadata_order$external_id %in% sex, "red", "black")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm  , main = "PCA of Gtex pancreas", xlab = "PC1 (10.84%)", ylab = "PC2 (7.17%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("female", "male"), pch = 21, pt.bg = c("red", "black"), col = "black")

age<- pan_gtex_metadata_order$external_id[pan_gtex_metadata_order$AGE == "70-79"]
age<- age[!is.na(age)]

tumor_norm <- ifelse( pan_gtex_metadata_order$external_id %in% age, "red", "black")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm  , main = "PCA of Gtex pancreas", xlab = "PC1 (10.84%)", ylab = "PC2 (7.17%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("70-79", "! 70-79"), pch = 21, pt.bg = c("red", "black"), col = "black")

age<- pan_gtex_metadata_order$external_id[pan_gtex_metadata_order$AGE == "20-29"]
age<- age[!is.na(age)]
#normal_ids<-  rownames(recount3_rse_PANCREAS@colData)[rownames(recount3_rse_PANCREAS@colData) %in% sex]

tumor_norm <- ifelse( pan_gtex_metadata_order$external_id %in% age, "red", "black")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm  , main = "PCA of Gtex pancreas", xlab = "PC1 (10.84%)", ylab = "PC2 (7.17%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("20-29", "! 20-29"), pch = 21, pt.bg = c("red", "black"), col = "black")

looking at RIN scores

RIN<- pan_gtex_metadata_order$external_id[pan_gtex_metadata_order$SMRIN >= 7]
RIN<- RIN[!is.na(RIN)]

tumor_norm <- ifelse( pan_gtex_metadata_order$external_id %in% RIN, "red", "black")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm  , main = "PCA of Gtex pancreas", xlab = "PC1 (10.84%)", ylab = "PC2 (7.17%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c(">= 7", "< 7"), pch = 21, pt.bg = c("red", "black"), col = "black")

SMTSISCH- indicates the minutes of ischmia time

time<- pan_gtex_metadata_order$external_id[pan_gtex_metadata_order$SMTSISCH >= 500]
time<- time[!is.na(time)]

tumor_norm <- ifelse( pan_gtex_metadata_order$external_id %in% time, "red", "black")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm  , main = "PCA of Gtex pancreas", xlab = "PC1 (10.84%)", ylab = "PC2 (7.17%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c(">= 500", "< 500"), pch = 21, pt.bg = c("red", "black"), col = "black")

PAAD

vst_table <- vst(as.matrix(paad_tcga_counts_order))
vst_table_df <- t(vst_table)
pca.tumor <- prcomp(vst_table_df)
summary(pca.tumor)
## Importance of components:
##                           PC1     PC2      PC3      PC4      PC5      PC6
## Standard deviation     63.542 58.4378 45.33777 40.66603 35.36355 33.69370
## Proportion of Variance  0.127  0.1074  0.06466  0.05202  0.03934  0.03571
## Cumulative Proportion   0.127  0.2344  0.29912  0.35114  0.39048  0.42620
##                             PC7     PC8      PC9     PC10     PC11     PC12
## Standard deviation     29.60559 25.2759 22.43073 21.44785 20.74062 19.41519
## Proportion of Variance  0.02757  0.0201  0.01583  0.01447  0.01353  0.01186
## Cumulative Proportion   0.45377  0.4739  0.48970  0.50417  0.51770  0.52956
##                            PC13     PC14     PC15     PC16    PC17     PC18
## Standard deviation     19.17076 18.17359 18.06052 17.78557 17.1918 16.27709
## Proportion of Variance  0.01156  0.01039  0.01026  0.00995  0.0093  0.00833
## Cumulative Proportion   0.54112  0.55151  0.56177  0.57172  0.5810  0.58936
##                            PC19     PC20     PC21     PC22     PC23     PC24
## Standard deviation     16.01044 15.87386 15.59041 15.34758 14.77060 14.60923
## Proportion of Variance  0.00806  0.00793  0.00765  0.00741  0.00686  0.00671
## Cumulative Proportion   0.59742  0.60535  0.61299  0.62040  0.62727  0.63398
##                            PC25     PC26    PC27     PC28     PC29     PC30
## Standard deviation     14.46102 14.19910 13.9263 13.76954 13.48942 13.20125
## Proportion of Variance  0.00658  0.00634  0.0061  0.00596  0.00572  0.00548
## Cumulative Proportion   0.64056  0.64690  0.6530  0.65897  0.66469  0.67018
##                            PC31     PC32     PC33     PC34     PC35    PC36
## Standard deviation     13.03449 12.83102 12.72018 12.67710 12.43568 12.2243
## Proportion of Variance  0.00534  0.00518  0.00509  0.00506  0.00487  0.0047
## Cumulative Proportion   0.67552  0.68070  0.68579  0.69085  0.69571  0.7004
##                           PC37     PC38     PC39     PC40     PC41     PC42
## Standard deviation     11.9649 11.73132 11.57543 11.48083 11.46756 11.40348
## Proportion of Variance  0.0045  0.00433  0.00422  0.00415  0.00414  0.00409
## Cumulative Proportion   0.7049  0.70924  0.71346  0.71761  0.72174  0.72583
##                            PC43     PC44    PC45    PC46     PC47    PC48
## Standard deviation     11.23405 11.20859 11.1385 10.8415 10.82423 10.6965
## Proportion of Variance  0.00397  0.00395  0.0039  0.0037  0.00369  0.0036
## Cumulative Proportion   0.72980  0.73376  0.7377  0.7414  0.74504  0.7486
##                            PC49     PC50     PC51     PC52     PC53     PC54
## Standard deviation     10.66931 10.51512 10.43732 10.37509 10.29633 10.20617
## Proportion of Variance  0.00358  0.00348  0.00343  0.00339  0.00334  0.00328
## Cumulative Proportion   0.75222  0.75570  0.75913  0.76252  0.76585  0.76913
##                            PC55    PC56    PC57    PC58    PC59    PC60    PC61
## Standard deviation     10.14409 10.0894 9.95004 9.93803 9.81732 9.74572 9.71099
## Proportion of Variance  0.00324  0.0032 0.00311 0.00311 0.00303 0.00299 0.00297
## Cumulative Proportion   0.77237  0.7756 0.77868 0.78179 0.78482 0.78781 0.79078
##                           PC62    PC63    PC64    PC65    PC66    PC67   PC68
## Standard deviation     9.66454 9.62211 9.55169 9.46667 9.40055 9.39645 9.2606
## Proportion of Variance 0.00294 0.00291 0.00287 0.00282 0.00278 0.00278 0.0027
## Cumulative Proportion  0.79371 0.79663 0.79950 0.80232 0.80510 0.80787 0.8106
##                           PC69    PC70   PC71    PC72    PC73   PC74    PC75
## Standard deviation     9.15686 9.12400 9.0973 9.02846 8.93559 8.9082 8.87756
## Proportion of Variance 0.00264 0.00262 0.0026 0.00256 0.00251 0.0025 0.00248
## Cumulative Proportion  0.81321 0.81583 0.8184 0.82100 0.82351 0.8260 0.82848
##                           PC76    PC77    PC78   PC79    PC80    PC81    PC82
## Standard deviation     8.85408 8.80841 8.78689 8.7356 8.65563 8.58392 8.57598
## Proportion of Variance 0.00247 0.00244 0.00243 0.0024 0.00236 0.00232 0.00231
## Cumulative Proportion  0.83095 0.83339 0.83582 0.8382 0.84058 0.84290 0.84521
##                          PC83    PC84    PC85    PC86    PC87    PC88    PC89
## Standard deviation     8.5430 8.43957 8.40618 8.39298 8.37405 8.32031 8.31543
## Proportion of Variance 0.0023 0.00224 0.00222 0.00222 0.00221 0.00218 0.00218
## Cumulative Proportion  0.8475 0.84975 0.85197 0.85418 0.85639 0.85857 0.86074
##                           PC90    PC91   PC92    PC93    PC94    PC95    PC96
## Standard deviation     8.23455 8.19348 8.1782 8.09924 8.08106 8.00326 7.99813
## Proportion of Variance 0.00213 0.00211 0.0021 0.00206 0.00205 0.00202 0.00201
## Cumulative Proportion  0.86288 0.86499 0.8671 0.86916 0.87121 0.87323 0.87524
##                           PC97    PC98    PC99   PC100   PC101   PC102   PC103
## Standard deviation     7.95493 7.92699 7.91154 7.87776 7.83079 7.81440 7.74768
## Proportion of Variance 0.00199 0.00198 0.00197 0.00195 0.00193 0.00192 0.00189
## Cumulative Proportion  0.87723 0.87921 0.88118 0.88313 0.88506 0.88698 0.88887
##                          PC104   PC105   PC106   PC107   PC108   PC109   PC110
## Standard deviation     7.73254 7.68775 7.67425 7.65054 7.62311 7.58639 7.54246
## Proportion of Variance 0.00188 0.00186 0.00185 0.00184 0.00183 0.00181 0.00179
## Cumulative Proportion  0.89075 0.89261 0.89446 0.89630 0.89813 0.89994 0.90173
##                          PC111   PC112   PC113   PC114   PC115   PC116  PC117
## Standard deviation     7.49990 7.46850 7.44663 7.42236 7.40376 7.38471 7.3515
## Proportion of Variance 0.00177 0.00175 0.00174 0.00173 0.00172 0.00172 0.0017
## Cumulative Proportion  0.90350 0.90525 0.90700 0.90873 0.91046 0.91217 0.9139
##                          PC118   PC119   PC120   PC121   PC122   PC123   PC124
## Standard deviation     7.32355 7.27552 7.24639 7.20976 7.18682 7.16815 7.15625
## Proportion of Variance 0.00169 0.00167 0.00165 0.00164 0.00162 0.00162 0.00161
## Cumulative Proportion  0.91556 0.91722 0.91888 0.92051 0.92214 0.92375 0.92536
##                         PC125   PC126   PC127   PC128   PC129   PC130   PC131
## Standard deviation     7.1277 7.10048 7.09176 7.06845 7.03026 6.99342 6.97223
## Proportion of Variance 0.0016 0.00159 0.00158 0.00157 0.00155 0.00154 0.00153
## Cumulative Proportion  0.9270 0.92855 0.93013 0.93170 0.93326 0.93479 0.93632
##                          PC132   PC133   PC134   PC135   PC136   PC137   PC138
## Standard deviation     6.96033 6.92977 6.88904 6.86298 6.83919 6.82806 6.80890
## Proportion of Variance 0.00152 0.00151 0.00149 0.00148 0.00147 0.00147 0.00146
## Cumulative Proportion  0.93785 0.93936 0.94085 0.94233 0.94381 0.94527 0.94673
##                          PC139   PC140   PC141   PC142  PC143  PC144   PC145
## Standard deviation     6.77021 6.75057 6.72850 6.70192 6.6795 6.6734 6.63239
## Proportion of Variance 0.00144 0.00143 0.00142 0.00141 0.0014 0.0014 0.00138
## Cumulative Proportion  0.94817 0.94961 0.95103 0.95244 0.9538 0.9553 0.95663
##                          PC146   PC147   PC148   PC149   PC150   PC151   PC152
## Standard deviation     6.59626 6.56552 6.55772 6.54689 6.53551 6.50186 6.48074
## Proportion of Variance 0.00137 0.00136 0.00135 0.00135 0.00134 0.00133 0.00132
## Cumulative Proportion  0.95800 0.95936 0.96071 0.96206 0.96340 0.96473 0.96605
##                          PC153   PC154   PC155   PC156   PC157   PC158   PC159
## Standard deviation     6.46046 6.41174 6.37304 6.35262 6.34031 6.33220 6.28801
## Proportion of Variance 0.00131 0.00129 0.00128 0.00127 0.00126 0.00126 0.00124
## Cumulative Proportion  0.96737 0.96866 0.96994 0.97121 0.97247 0.97373 0.97498
##                          PC160   PC161   PC162  PC163   PC164   PC165   PC166
## Standard deviation     6.26874 6.23603 6.21546 6.1750 6.14416 6.10423 6.07537
## Proportion of Variance 0.00124 0.00122 0.00122 0.0012 0.00119 0.00117 0.00116
## Cumulative Proportion  0.97621 0.97744 0.97865 0.9798 0.98104 0.98221 0.98337
##                          PC167   PC168   PC169   PC170   PC171   PC172   PC173
## Standard deviation     6.03681 5.99665 5.98718 5.96258 5.93709 5.83782 5.83298
## Proportion of Variance 0.00115 0.00113 0.00113 0.00112 0.00111 0.00107 0.00107
## Cumulative Proportion  0.98452 0.98565 0.98678 0.98790 0.98900 0.99008 0.99115
##                          PC174   PC175   PC176   PC177   PC178   PC179   PC180
## Standard deviation     5.80251 5.77446 5.72864 5.67400 5.66476 5.50592 5.46563
## Proportion of Variance 0.00106 0.00105 0.00103 0.00101 0.00101 0.00095 0.00094
## Cumulative Proportion  0.99221 0.99325 0.99429 0.99530 0.99631 0.99726 0.99820
##                          PC181   PC182     PC183
## Standard deviation     5.41786 5.26986 1.531e-13
## Proportion of Variance 0.00092 0.00087 0.000e+00
## Cumulative Proportion  0.99913 1.00000 1.000e+00
nt <- paad_tcga_metadata_order$external_id[paad_tcga_metadata_order$cgc_sample_sample_type == "Solid Tissue Normal"] 

tumor_norm <- ifelse(paad_tcga_metadata_order$external_id %in% nt, "black", "red")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm  , main = "PCA of PAAD", xlab = "PC1 (12.7%)", ylab = "PC2 (10.74%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("tumor", "normal"), pch = 21, pt.bg = c("red", "black"), col = "black")

test <- as.data.frame(paad_tcga_metadata_order[pca.tumor$x[, 1] >150,])
nt <- paad_tcga_metadata_order$external_id[grep("NEUROENDOCRINE",paad_tcga_metadata_order$cgc_case_other_histological_diagnosis, ignore.case = TRUE) ] 
#normal_ids<- rownames(recount3_rse_PAAD@colData)[rownames(recount3_rse_PAAD@colData) %in% nt]

tumor_norm <- ifelse(paad_tcga_metadata_order$external_id %in% nt, "black", "red")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm  , main = "PCA of PAAD", xlab = "PC1 (12.7%)", ylab = "PC2 (10.74%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("other", "NEUROENDOCRINE tumors"), pch = 21, pt.bg = c("red", "black"), col = "black")

remove neuroendocrine tumors

nt <- paad_tcga_metadata_order$tcga_barcode[grep("NEUROENDOCRINE",paad_tcga_metadata_order$cgc_case_other_histological_diagnosis, ignore.case = TRUE) ] 
metadata<- as.data.frame(paad_tcga_metadata_order)[! paad_tcga_metadata_order$tcga_barcode %in% nt]

vst_table_v2 <- vst_table[,!colnames(vst_table) %in% nt]
vst_table_df <- t(vst_table_v2)
pca.tumor <- prcomp(vst_table_df)
summary(pca.tumor)
## Importance of components:
##                            PC1      PC2      PC3      PC4      PC5      PC6
## Standard deviation     62.4031 46.88721 41.53646 36.47094 35.14795 30.09497
## Proportion of Variance  0.1367  0.07716  0.06056  0.04669  0.04336  0.03179
## Cumulative Proportion   0.1367  0.21384  0.27440  0.32108  0.36445  0.39623
##                             PC7      PC8     PC9     PC10     PC11     PC12
## Standard deviation     28.97007 25.23217 22.4547 21.65683 20.47935 18.78061
## Proportion of Variance  0.02946  0.02235  0.0177  0.01646  0.01472  0.01238
## Cumulative Proportion   0.42569  0.44804  0.4657  0.48220  0.49692  0.50930
##                            PC13     PC14     PC15     PC16     PC17     PC18
## Standard deviation     18.68885 18.19049 17.63742 16.58814 16.40326 15.88163
## Proportion of Variance  0.01226  0.01161  0.01092  0.00966  0.00944  0.00885
## Cumulative Proportion   0.52156  0.53317  0.54409  0.55375  0.56319  0.57205
##                            PC19     PC20     PC21     PC22    PC23     PC24
## Standard deviation     15.58815 15.08570 14.94203 14.62833 14.2210 14.17681
## Proportion of Variance  0.00853  0.00799  0.00784  0.00751  0.0071  0.00705
## Cumulative Proportion   0.58057  0.58856  0.59640  0.60391  0.6110  0.61806
##                            PC25     PC26     PC27     PC28     PC29     PC30
## Standard deviation     13.74874 13.68578 13.43291 13.24861 13.15758 12.94523
## Proportion of Variance  0.00663  0.00657  0.00633  0.00616  0.00608  0.00588
## Cumulative Proportion   0.62470  0.63127  0.63760  0.64376  0.64984  0.65572
##                            PC31     PC32     PC33     PC34     PC35    PC36
## Standard deviation     12.59040 12.29439 11.89921 11.78698 11.75442 11.6898
## Proportion of Variance  0.00556  0.00531  0.00497  0.00488  0.00485  0.0048
## Cumulative Proportion   0.66129  0.66659  0.67156  0.67644  0.68129  0.6861
##                            PC37     PC38     PC39     PC40     PC41     PC42
## Standard deviation     11.56517 11.44123 11.29835 11.25786 11.12692 10.97630
## Proportion of Variance  0.00469  0.00459  0.00448  0.00445  0.00435  0.00423
## Cumulative Proportion   0.69078  0.69537  0.69985  0.70430  0.70865  0.71288
##                            PC43    PC44     PC45     PC46     PC47     PC48
## Standard deviation     10.90700 10.8033 10.63793 10.57471 10.50130 10.42558
## Proportion of Variance  0.00418  0.0041  0.00397  0.00392  0.00387  0.00382
## Cumulative Proportion   0.71705  0.7211  0.72512  0.72905  0.73292  0.73673
##                            PC49     PC50     PC51     PC52     PC53    PC54
## Standard deviation     10.31270 10.25271 10.17429 10.10076 10.07905 9.96409
## Proportion of Variance  0.00373  0.00369  0.00363  0.00358  0.00357 0.00348
## Cumulative Proportion   0.74046  0.74415  0.74779  0.75137  0.75493 0.75842
##                           PC55    PC56    PC57    PC58    PC59    PC60    PC61
## Standard deviation     9.93805 9.88342 9.80848 9.75474 9.67990 9.49513 9.46514
## Proportion of Variance 0.00347 0.00343 0.00338 0.00334 0.00329 0.00316 0.00314
## Cumulative Proportion  0.76188 0.76531 0.76869 0.77203 0.77532 0.77848 0.78163
##                           PC62    PC63    PC64   PC65    PC66    PC67    PC68
## Standard deviation     9.41030 9.35724 9.27998 9.2392 9.19203 9.16247 9.10110
## Proportion of Variance 0.00311 0.00307 0.00302 0.0030 0.00297 0.00295 0.00291
## Cumulative Proportion  0.78474 0.78781 0.79083 0.7938 0.79679 0.79974 0.80265
##                           PC69    PC70    PC71   PC72    PC73    PC74   PC75
## Standard deviation     9.07221 9.05062 9.00027 8.9372 8.85509 8.83956 8.7717
## Proportion of Variance 0.00289 0.00288 0.00284 0.0028 0.00275 0.00274 0.0027
## Cumulative Proportion  0.80554 0.80841 0.81125 0.8141 0.81681 0.81955 0.8223
##                           PC76    PC77    PC78    PC79    PC80    PC81    PC82
## Standard deviation     8.69155 8.66293 8.64709 8.58798 8.56476 8.53237 8.49246
## Proportion of Variance 0.00265 0.00263 0.00262 0.00259 0.00257 0.00256 0.00253
## Cumulative Proportion  0.82490 0.82754 0.83016 0.83275 0.83533 0.83788 0.84041
##                           PC83    PC84    PC85    PC86    PC87    PC88    PC89
## Standard deviation     8.40163 8.38560 8.32498 8.29403 8.21743 8.20029 8.17371
## Proportion of Variance 0.00248 0.00247 0.00243 0.00241 0.00237 0.00236 0.00234
## Cumulative Proportion  0.84289 0.84536 0.84779 0.85021 0.85258 0.85494 0.85728
##                           PC90   PC91    PC92    PC93    PC94    PC95    PC96
## Standard deviation     8.15159 8.0889 8.07267 8.03202 7.97668 7.95963 7.94826
## Proportion of Variance 0.00233 0.0023 0.00229 0.00226 0.00223 0.00222 0.00222
## Cumulative Proportion  0.85961 0.8619 0.86420 0.86646 0.86869 0.87092 0.87314
##                          PC97    PC98    PC99   PC100   PC101  PC102   PC103
## Standard deviation     7.9163 7.87493 7.84841 7.82560 7.77542 7.7298 7.68903
## Proportion of Variance 0.0022 0.00218 0.00216 0.00215 0.00212 0.0021 0.00208
## Cumulative Proportion  0.8753 0.87751 0.87967 0.88182 0.88395 0.8860 0.88812
##                          PC104   PC105   PC106   PC107   PC108  PC109   PC110
## Standard deviation     7.68714 7.65129 7.61265 7.60194 7.57893 7.5422 7.53265
## Proportion of Variance 0.00207 0.00205 0.00203 0.00203 0.00202 0.0020 0.00199
## Cumulative Proportion  0.89019 0.89225 0.89428 0.89631 0.89832 0.9003 0.90231
##                          PC111   PC112   PC113   PC114  PC115   PC116   PC117
## Standard deviation     7.46367 7.41557 7.39365 7.36910 7.3620 7.33842 7.31735
## Proportion of Variance 0.00196 0.00193 0.00192 0.00191 0.0019 0.00189 0.00188
## Cumulative Proportion  0.90427 0.90620 0.90812 0.91002 0.9119 0.91382 0.91570
##                          PC118   PC119   PC120   PC121   PC122   PC123   PC124
## Standard deviation     7.29276 7.25964 7.24399 7.18968 7.17512 7.14799 7.11589
## Proportion of Variance 0.00187 0.00185 0.00184 0.00181 0.00181 0.00179 0.00178
## Cumulative Proportion  0.91756 0.91941 0.92125 0.92307 0.92487 0.92667 0.92845
##                          PC125   PC126   PC127   PC128   PC129   PC130   PC131
## Standard deviation     7.08893 7.06877 7.05156 7.02036 6.99566 6.97352 6.93825
## Proportion of Variance 0.00176 0.00175 0.00175 0.00173 0.00172 0.00171 0.00169
## Cumulative Proportion  0.93021 0.93196 0.93371 0.93544 0.93716 0.93886 0.94055
##                          PC132   PC133   PC134   PC135   PC136   PC137   PC138
## Standard deviation     6.91710 6.89225 6.88361 6.85781 6.83451 6.78593 6.76803
## Proportion of Variance 0.00168 0.00167 0.00166 0.00165 0.00164 0.00162 0.00161
## Cumulative Proportion  0.94223 0.94390 0.94556 0.94721 0.94885 0.95047 0.95208
##                          PC139   PC140   PC141   PC142   PC143   PC144   PC145
## Standard deviation     6.73768 6.72154 6.71289 6.68377 6.68042 6.64084 6.59101
## Proportion of Variance 0.00159 0.00159 0.00158 0.00157 0.00157 0.00155 0.00152
## Cumulative Proportion  0.95367 0.95526 0.95684 0.95841 0.95997 0.96152 0.96304
##                          PC146  PC147   PC148   PC149   PC150   PC151   PC152
## Standard deviation     6.57244 6.5270 6.52185 6.49955 6.47523 6.45629 6.40909
## Proportion of Variance 0.00152 0.0015 0.00149 0.00148 0.00147 0.00146 0.00144
## Cumulative Proportion  0.96456 0.9661 0.96755 0.96903 0.97050 0.97197 0.97341
##                          PC153   PC154  PC155   PC156   PC157   PC158   PC159
## Standard deviation     6.38055 6.36164 6.3069 6.26724 6.23827 6.20937 6.15954
## Proportion of Variance 0.00143 0.00142 0.0014 0.00138 0.00137 0.00135 0.00133
## Cumulative Proportion  0.97484 0.97626 0.9777 0.97903 0.98040 0.98175 0.98308
##                          PC160   PC161   PC162   PC163   PC164   PC165   PC166
## Standard deviation     6.14947 6.13003 6.10119 6.00558 5.98531 5.95604 5.93735
## Proportion of Variance 0.00133 0.00132 0.00131 0.00127 0.00126 0.00125 0.00124
## Cumulative Proportion  0.98441 0.98573 0.98704 0.98830 0.98956 0.99080 0.99204
##                          PC167  PC168   PC169   PC170   PC171   PC172   PC173
## Standard deviation     5.90130 5.8379 5.82042 5.66063 5.62063 5.56764 5.41600
## Proportion of Variance 0.00122 0.0012 0.00119 0.00112 0.00111 0.00109 0.00103
## Cumulative Proportion  0.99326 0.9945 0.99565 0.99677 0.99788 0.99897 1.00000
##                            PC174
## Standard deviation     1.531e-13
## Proportion of Variance 0.000e+00
## Cumulative Proportion  1.000e+00
nt <- metadata$tcga_barcode[metadata$cgc_sample_sample_type == "Solid Tissue Normal"] 
#normal_ids<- rownames(metadata)[rownames(metadata) %in% nt]

tumor_norm <- ifelse(metadata$tcga_barcode %in% nt, "black", "red")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm  , main = "PCA of PAAD", xlab = "PC1 (13.67%)", ylab = "PC2 (7.716%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("tumor", "normal"), pch = 21, pt.bg = c("red", "black"), col = "black")

names(pca.tumor$x[, 1])[pca.tumor$x[, 1] < -130]
## [1] "TCGA-HV-A7OP-01A-11R-A33R-07" "TCGA-HZ-7289-01A-11R-2156-07"
## [3] "TCGA-FB-AAPP-01A-12R-A41B-07" "TCGA-FB-AAQ0-01A-31R-A41B-07"
list <- names(pca.tumor$x[, 1])[pca.tumor$x[, 1] < -130]
metadata_test <- metadata[rownames(metadata) %in% list,]
plot(pca.tumor$x[, 3], pca.tumor$x[, 4], pch = 20, col = tumor_norm  , main = "PCA of PAAD", xlab = "PC3", ylab = "PC4", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("tumor", "normal"), pch = 21, pt.bg = c("red", "black"), col = "black")

plot(pca.tumor$x[, 5], pca.tumor$x[, 6], pch = 20, col = tumor_norm  , main = "PCA of PAAD", xlab = "PC5", ylab = "PC6", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("tumor", "normal"), pch = 21, pt.bg = c("red", "black"), col = "black")

plot(pca.tumor$x[, 6], pca.tumor$x[, 7], pch = 20, col = tumor_norm  , main = "PCA of PAAD", xlab = "PC6", ylab = "PC7", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("tumor", "normal"), pch = 21, pt.bg = c("red", "black"), col = "black")

plot(pca.tumor$x[, 8], pca.tumor$x[, 9], pch = 20, col = tumor_norm  , main = "PCA of PAAD", xlab = "PC8", ylab = "PC9", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("tumor", "normal"), pch = 21, pt.bg = c("red", "black"), col = "black")

plot(pca.tumor$x[, 10], pca.tumor$x[, 11], pch = 20, col = tumor_norm  , main = "PCA of PAAD", xlab = "PC10", ylab = "PC11", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("tumor", "normal"), pch = 21, pt.bg = c("red", "black"), col = "black")

metadata_test <- metadata[rownames(metadata) %in% nt,]

correctly label

Note that PAAD tumors have a ton of cancer fibroblast which is why the tumor and normal don’t separate.

Liver cancer

import the liver data

liver_gtex_counts <- readRDS("~/data/recount3/recount3_fix_download/liver_gtex_counts.rds")
liver_gtex_metadata <- readRDS("~/data/recount3/recount3_fix_download/liver_gtex_metadata.rds")
colnames(liver_gtex_counts)[1:5]
## [1] "GTEX.13PVQ.1526.SM.5IFEQ.1" "GTEX.UTHO.2426.SM.4JBHD.1" 
## [3] "GTEX.13NZB.0626.SM.5IFH6.1" "GTEX.ZPU1.0826.SM.57WG2.1" 
## [5] "GTEX.XBEC.1526.SM.4AT68.1"
liver_gtex_metadata$external_id[1:5]
## [1] "GTEX-1192X-1026-SM-5H12P.1" "GTEX-11DXY-0526-SM-5EGGQ.1"
## [3] "GTEX-11DXZ-0126-SM-5EGGY.1" "GTEX-11EQ9-0526-SM-5A5JZ.1"
## [5] "GTEX-11GSP-0626-SM-5986T.1"
ids<- str_replace_all(colnames(liver_gtex_counts), "[[:punct:]]", "-") 
meta_data_ids<- str_replace_all(liver_gtex_metadata$external_id, "[[:punct:]]", "-")
identical(ids[order(ids)], meta_data_ids[order(meta_data_ids)])
## [1] TRUE
liver_gtex_counts_order <- liver_gtex_counts[,order(ids)]
liver_gtex_metadata_order<- liver_gtex_metadata[order(meta_data_ids), ]
colnames(liver_gtex_counts_order )<- liver_gtex_metadata_order$external_id
recount3_count_liver <- as.data.frame(liver_gtex_counts_order)
saveRDS(recount3_count_liver,  "/home/rstudio/data/liver_gtex_count_ordered.rds")
saveRDS(liver_gtex_metadata_order,  "/home/rstudio/data/liver_gtex_metadata_ordered.rds")
library(DESeq2)
#colData(recount3_rse_PANCREAS)
#counts_liver[is.na(counts_liver)] <- 0
#sample has very high counts; removed for vst transformation
counts_liver  <- recount3_count_liver [,!colnames(recount3_count_liver ) == "GTEX-WK11-1326-SM-4OOSI.1"]

vst_table <- vst(as.matrix(counts_liver))

pca.tumor <- prcomp(t(vst_table))
summary(pca.tumor)
## Importance of components:
##                            PC1      PC2      PC3     PC4      PC5      PC6
## Standard deviation     52.5967 35.94987 30.28704 23.8062 21.27946 18.65812
## Proportion of Variance  0.1909  0.08916  0.06329  0.0391  0.03124  0.02402
## Cumulative Proportion   0.1909  0.28003  0.34331  0.3824  0.41365  0.43767
##                             PC7      PC8      PC9     PC10     PC11     PC12
## Standard deviation     16.86723 16.45599 15.37530 14.47524 14.24092 12.92163
## Proportion of Variance  0.01963  0.01868  0.01631  0.01446  0.01399  0.01152
## Cumulative Proportion   0.45730  0.47598  0.49229  0.50675  0.52074  0.53226
##                            PC13    PC14     PC15     PC16    PC17     PC18
## Standard deviation     12.52814 12.4530 11.97517 11.64336 11.3602 10.54322
## Proportion of Variance  0.01083  0.0107  0.00989  0.00935  0.0089  0.00767
## Cumulative Proportion   0.54309  0.5538  0.56368  0.57303  0.5819  0.58961
##                            PC19    PC20    PC21    PC22    PC23    PC24    PC25
## Standard deviation     10.37654 10.2128 9.76497 9.67642 9.50243 9.08517 8.93310
## Proportion of Variance  0.00743  0.0072 0.00658 0.00646 0.00623 0.00569 0.00551
## Cumulative Proportion   0.59704  0.6042 0.61081 0.61727 0.62350 0.62919 0.63470
##                           PC26    PC27    PC28    PC29    PC30    PC31    PC32
## Standard deviation     8.80302 8.55262 8.38273 8.21609 7.99194 7.94219 7.88825
## Proportion of Variance 0.00535 0.00505 0.00485 0.00466 0.00441 0.00435 0.00429
## Cumulative Proportion  0.64005 0.64509 0.64994 0.65460 0.65900 0.66336 0.66765
##                          PC33   PC34    PC35    PC36   PC37    PC38    PC39
## Standard deviation     7.7988 7.7079 7.46246 7.34232 7.3271 7.24060 7.21482
## Proportion of Variance 0.0042 0.0041 0.00384 0.00372 0.0037 0.00362 0.00359
## Cumulative Proportion  0.6718 0.6759 0.67979 0.68351 0.6872 0.69083 0.69442
##                           PC40    PC41    PC42    PC43    PC44    PC45    PC46
## Standard deviation     7.09779 7.04855 6.90736 6.87666 6.82741 6.69147 6.66544
## Proportion of Variance 0.00348 0.00343 0.00329 0.00326 0.00322 0.00309 0.00307
## Cumulative Proportion  0.69789 0.70132 0.70461 0.70788 0.71109 0.71418 0.71725
##                           PC47    PC48    PC49    PC50    PC51    PC52   PC53
## Standard deviation     6.64234 6.56392 6.51980 6.46102 6.41040 6.27349 6.2539
## Proportion of Variance 0.00304 0.00297 0.00293 0.00288 0.00284 0.00272 0.0027
## Cumulative Proportion  0.72029 0.72326 0.72620 0.72908 0.73191 0.73463 0.7373
##                           PC54    PC55    PC56    PC57    PC58    PC59    PC60
## Standard deviation     6.19696 6.18021 6.10989 6.09423 5.96869 5.95260 5.90809
## Proportion of Variance 0.00265 0.00264 0.00258 0.00256 0.00246 0.00244 0.00241
## Cumulative Proportion  0.73997 0.74261 0.74518 0.74775 0.75020 0.75265 0.75506
##                           PC61    PC62    PC63    PC64    PC65    PC66    PC67
## Standard deviation     5.86654 5.81341 5.73502 5.71785 5.68002 5.63381 5.58592
## Proportion of Variance 0.00237 0.00233 0.00227 0.00226 0.00223 0.00219 0.00215
## Cumulative Proportion  0.75743 0.75976 0.76203 0.76429 0.76651 0.76870 0.77086
##                           PC68    PC69   PC70    PC71    PC72    PC73   PC74
## Standard deviation     5.55208 5.55071 5.5175 5.48626 5.42060 5.39719 5.3862
## Proportion of Variance 0.00213 0.00213 0.0021 0.00208 0.00203 0.00201 0.0020
## Cumulative Proportion  0.77298 0.77511 0.7772 0.77929 0.78131 0.78332 0.7853
##                           PC75    PC76    PC77    PC78    PC79    PC80    PC81
## Standard deviation     5.34500 5.32935 5.30261 5.26732 5.26020 5.23354 5.21590
## Proportion of Variance 0.00197 0.00196 0.00194 0.00191 0.00191 0.00189 0.00188
## Cumulative Proportion  0.78730 0.78925 0.79119 0.79311 0.79502 0.79691 0.79878
##                           PC82    PC83    PC84   PC85   PC86    PC87    PC88
## Standard deviation     5.19696 5.17214 5.15230 5.1146 5.1073 5.08535 5.05372
## Proportion of Variance 0.00186 0.00185 0.00183 0.0018 0.0018 0.00178 0.00176
## Cumulative Proportion  0.80065 0.80249 0.80433 0.8061 0.8079 0.80971 0.81148
##                           PC89    PC90    PC91    PC92    PC93    PC94    PC95
## Standard deviation     5.04817 5.02171 4.98902 4.97151 4.95648 4.93633 4.92196
## Proportion of Variance 0.00176 0.00174 0.00172 0.00171 0.00169 0.00168 0.00167
## Cumulative Proportion  0.81323 0.81497 0.81669 0.81840 0.82009 0.82177 0.82344
##                           PC96    PC97    PC98    PC99   PC100   PC101  PC102
## Standard deviation     4.91153 4.89032 4.88584 4.87962 4.85498 4.82651 4.8155
## Proportion of Variance 0.00166 0.00165 0.00165 0.00164 0.00163 0.00161 0.0016
## Cumulative Proportion  0.82511 0.82676 0.82840 0.83005 0.83167 0.83328 0.8349
##                          PC103   PC104   PC105   PC106   PC107   PC108   PC109
## Standard deviation     4.80713 4.78770 4.77864 4.75624 4.72984 4.71947 4.70393
## Proportion of Variance 0.00159 0.00158 0.00158 0.00156 0.00154 0.00154 0.00153
## Cumulative Proportion  0.83648 0.83806 0.83963 0.84119 0.84274 0.84427 0.84580
##                          PC110   PC111  PC112  PC113   PC114   PC115   PC116
## Standard deviation     4.68968 4.68400 4.6694 4.6620 4.65009 4.64362 4.63095
## Proportion of Variance 0.00152 0.00151 0.0015 0.0015 0.00149 0.00149 0.00148
## Cumulative Proportion  0.84732 0.84883 0.8503 0.8518 0.85333 0.85481 0.85629
##                          PC117   PC118   PC119   PC120   PC121   PC122   PC123
## Standard deviation     4.61497 4.60545 4.60076 4.57736 4.55731 4.54381 4.53646
## Proportion of Variance 0.00147 0.00146 0.00146 0.00145 0.00143 0.00142 0.00142
## Cumulative Proportion  0.85776 0.85923 0.86069 0.86213 0.86356 0.86499 0.86641
##                          PC124   PC125  PC126   PC127   PC128   PC129   PC130
## Standard deviation     4.52887 4.51799 4.4985 4.49233 4.47698 4.46410 4.44995
## Proportion of Variance 0.00142 0.00141 0.0014 0.00139 0.00138 0.00137 0.00137
## Cumulative Proportion  0.86782 0.86923 0.8706 0.87202 0.87340 0.87478 0.87614
##                          PC131   PC132   PC133   PC134   PC135   PC136   PC137
## Standard deviation     4.43709 4.42440 4.41385 4.40879 4.39936 4.39410 4.37551
## Proportion of Variance 0.00136 0.00135 0.00134 0.00134 0.00134 0.00133 0.00132
## Cumulative Proportion  0.87750 0.87885 0.88020 0.88154 0.88287 0.88421 0.88553
##                          PC138   PC139  PC140  PC141   PC142   PC143   PC144
## Standard deviation     4.36735 4.35527 4.3452 4.3347 4.31760 4.31106 4.30843
## Proportion of Variance 0.00132 0.00131 0.0013 0.0013 0.00129 0.00128 0.00128
## Cumulative Proportion  0.88684 0.88815 0.8894 0.8908 0.89204 0.89332 0.89460
##                          PC145   PC146   PC147   PC148   PC149   PC150   PC151
## Standard deviation     4.30116 4.28136 4.27973 4.27833 4.26897 4.25527 4.24456
## Proportion of Variance 0.00128 0.00126 0.00126 0.00126 0.00126 0.00125 0.00124
## Cumulative Proportion  0.89588 0.89714 0.89840 0.89967 0.90092 0.90217 0.90342
##                          PC152   PC153   PC154   PC155   PC156   PC157   PC158
## Standard deviation     4.23175 4.22609 4.22287 4.21212 4.19407 4.19254 4.18540
## Proportion of Variance 0.00124 0.00123 0.00123 0.00122 0.00121 0.00121 0.00121
## Cumulative Proportion  0.90465 0.90588 0.90711 0.90834 0.90955 0.91076 0.91197
##                          PC159  PC160   PC161   PC162   PC163   PC164   PC165
## Standard deviation     4.18148 4.1757 4.15945 4.15326 4.15064 4.13266 4.13060
## Proportion of Variance 0.00121 0.0012 0.00119 0.00119 0.00119 0.00118 0.00118
## Cumulative Proportion  0.91318 0.9144 0.91558 0.91677 0.91796 0.91913 0.92031
##                          PC166   PC167   PC168   PC169   PC170   PC171   PC172
## Standard deviation     4.12239 4.10979 4.10381 4.08049 4.07807 4.07175 4.06428
## Proportion of Variance 0.00117 0.00117 0.00116 0.00115 0.00115 0.00114 0.00114
## Cumulative Proportion  0.92148 0.92265 0.92381 0.92496 0.92611 0.92725 0.92839
##                          PC173   PC174   PC175   PC176   PC177   PC178   PC179
## Standard deviation     4.05928 4.04905 4.04227 4.03604 4.03148 4.01621 4.00615
## Proportion of Variance 0.00114 0.00113 0.00113 0.00112 0.00112 0.00111 0.00111
## Cumulative Proportion  0.92953 0.93066 0.93179 0.93291 0.93403 0.93514 0.93625
##                          PC180  PC181  PC182   PC183   PC184   PC185   PC186
## Standard deviation     4.00344 3.9970 3.9847 3.98190 3.97795 3.97097 3.96063
## Proportion of Variance 0.00111 0.0011 0.0011 0.00109 0.00109 0.00109 0.00108
## Cumulative Proportion  0.93736 0.9385 0.9395 0.94065 0.94174 0.94283 0.94391
##                          PC187   PC188   PC189   PC190   PC191   PC192   PC193
## Standard deviation     3.94925 3.94394 3.93558 3.92959 3.91346 3.90818 3.89527
## Proportion of Variance 0.00108 0.00107 0.00107 0.00107 0.00106 0.00105 0.00105
## Cumulative Proportion  0.94499 0.94606 0.94713 0.94819 0.94925 0.95030 0.95135
##                          PC194   PC195   PC196   PC197   PC198   PC199   PC200
## Standard deviation     3.88545 3.88213 3.87032 3.86698 3.86364 3.85295 3.83998
## Proportion of Variance 0.00104 0.00104 0.00103 0.00103 0.00103 0.00102 0.00102
## Cumulative Proportion  0.95239 0.95343 0.95446 0.95550 0.95653 0.95755 0.95857
##                          PC201   PC202   PC203  PC204  PC205   PC206   PC207
## Standard deviation     3.82618 3.82292 3.81866 3.8102 3.8091 3.79490 3.78678
## Proportion of Variance 0.00101 0.00101 0.00101 0.0010 0.0010 0.00099 0.00099
## Cumulative Proportion  0.95958 0.96059 0.96159 0.9626 0.9636 0.96459 0.96558
##                          PC208   PC209   PC210   PC211   PC212   PC213   PC214
## Standard deviation     3.77731 3.77639 3.75850 3.75212 3.74980 3.73737 3.73401
## Proportion of Variance 0.00098 0.00098 0.00097 0.00097 0.00097 0.00096 0.00096
## Cumulative Proportion  0.96656 0.96755 0.96852 0.96949 0.97046 0.97143 0.97239
##                          PC215   PC216   PC217   PC218   PC219   PC220   PC221
## Standard deviation     3.72930 3.71798 3.70684 3.69653 3.68231 3.67427 3.66526
## Proportion of Variance 0.00096 0.00095 0.00095 0.00094 0.00094 0.00093 0.00093
## Cumulative Proportion  0.97335 0.97430 0.97525 0.97619 0.97713 0.97806 0.97899
##                          PC222   PC223   PC224  PC225   PC226   PC227   PC228
## Standard deviation     3.64503 3.63663 3.63108 3.6128 3.59799 3.59173 3.58447
## Proportion of Variance 0.00092 0.00091 0.00091 0.0009 0.00089 0.00089 0.00089
## Cumulative Proportion  0.97990 0.98081 0.98172 0.9826 0.98352 0.98441 0.98529
##                          PC229   PC230   PC231   PC232   PC233   PC234   PC235
## Standard deviation     3.58336 3.57029 3.55477 3.54652 3.53573 3.52205 3.50984
## Proportion of Variance 0.00089 0.00088 0.00087 0.00087 0.00086 0.00086 0.00085
## Cumulative Proportion  0.98618 0.98706 0.98793 0.98880 0.98966 0.99052 0.99137
##                          PC236   PC237   PC238   PC239   PC240   PC241   PC242
## Standard deviation     3.50034 3.48644 3.45560 3.44211 3.42502 3.38215 3.26243
## Proportion of Variance 0.00085 0.00084 0.00082 0.00082 0.00081 0.00079 0.00073
## Cumulative Proportion  0.99221 0.99305 0.99388 0.99469 0.99550 0.99629 0.99703
##                          PC243   PC244   PC245   PC246   PC247     PC248
## Standard deviation     3.22816 3.14439 3.06517 2.61396 2.56508 1.447e-13
## Proportion of Variance 0.00072 0.00068 0.00065 0.00047 0.00045 0.000e+00
## Cumulative Proportion  0.99774 0.99843 0.99907 0.99955 1.00000 1.000e+00
##                            PC249     PC250
## Standard deviation     2.336e-14 1.602e-14
## Proportion of Variance 0.000e+00 0.000e+00
## Cumulative Proportion  1.000e+00 1.000e+00
liver_gtex_metadata_order_v2<- liver_gtex_metadata_order[! liver_gtex_metadata_order$external_id == "GTEX-WK11-1326-SM-4OOSI.1",]
sex<- liver_gtex_metadata_order_v2$external_id[liver_gtex_metadata_order_v2$SEX== "2"]
sex<- sex[!is.na(sex)]

tumor_norm <- ifelse( liver_gtex_metadata_order_v2$external_id%in% sex, "red", "black")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm  , main = "PCA of GTEx Liver", xlab = "PC1 (19.09%)", ylab = "PC2 (8.92%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("female", "male/normal"), pch = 21, pt.bg = c("red", "black"), col = "black")

age<- liver_gtex_metadata_order_v2$external_id[liver_gtex_metadata_order_v2$AGE ==  "70-79"]
age<- age[!is.na(age)]


tumor_norm <- ifelse( liver_gtex_metadata_order_v2$external_id %in% age, "red", "black")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm  , main = "PCA of GTEx Liver",  xlab = "PC1 (19.09%)", ylab = "PC2 (8.92%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("70-79", "! 70-79"), pch = 21, pt.bg = c("red", "black"), col = "black")

age<- liver_gtex_metadata_order_v2$external_id[liver_gtex_metadata_order_v2$AGE==  "20-29"]
age<- age[!is.na(age)]

tumor_norm <- ifelse( liver_gtex_metadata_order_v2$external_id %in% age, "red", "black")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm  , main = "PCA of GTEx Liver", xlab = "PC1 (19.09%)", ylab = "PC2 (8.92%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("20-29", "! 20-29"), pch = 21, pt.bg = c("red", "black"), col = "black")

looking at the RIN score

rin<- liver_gtex_metadata_order_v2$external_id[liver_gtex_metadata_order_v2$SMRIN >= 7]
rin<- rin[!is.na(rin)]


tumor_norm <- ifelse( liver_gtex_metadata_order_v2$external_id %in% rin, "red", "black")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm  , main = "PCA of GTEx Liver",xlab = "PC1 (19.09%)", ylab = "PC2 (8.92%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c(">= 7", "< 7"), pch = 21, pt.bg = c("red", "black"), col = "black")

SMTSISCH- indicates the minutes of ischemia time

time<- liver_gtex_metadata_order_v2$external_id[liver_gtex_metadata_order_v2$SMTSISCH >= 500]
time<- time[!is.na(time)]

tumor_norm <- ifelse( liver_gtex_metadata_order_v2$external_id %in% time, "red", "black")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm  , main = "PCA of GTEx Liver", xlab = "PC1 (19.09%)", ylab = "PC2 (8.92%)", cex.axis = "1.5", cex.lab = "1.5")
legend("bottomleft", legend = c(">= 500", "< 500"), pch = 21, pt.bg = c("red", "black"), col = "black")

LIHC tumor samples

lihc_tcga_metadata <- readRDS("~/data/recount3/recount3_fix_download/lihc_tcga_metadata.rds")
lihc_tcga_counts <- readRDS("~/data/recount3/recount3_fix_download/lihc_tcga_counts.rds")

check counts and metadata info

dim(lihc_tcga_counts)
## [1] 63856   424
dim(lihc_tcga_metadata)
## [1] 424 840
nchar(colnames(lihc_tcga_counts)[1])
## [1] 36
ids<- substrRight(colnames(lihc_tcga_counts), 36)
ids<- str_replace_all(ids, "[[:punct:]]", "-")
colnames(lihc_tcga_counts) <- ids
lihc_tcga_counts_order <- lihc_tcga_counts[,order(ids)]
lihc_tcga_metadata_order<- lihc_tcga_metadata[order(lihc_tcga_metadata$external_id), ]
identical(colnames(lihc_tcga_counts_order), lihc_tcga_metadata_order$external_id)
## [1] TRUE
colnames(lihc_tcga_counts_order)<- lihc_tcga_metadata_order$tcga_barcode
saveRDS(lihc_tcga_counts_order,  "/home/rstudio/data/lihc_tcga_count_ordered.rds")
saveRDS(lihc_tcga_metadata_order,  "/home/rstudio/data/lihc_tcga_metadata_ordered.rds")
counts_liver <- lihc_tcga_counts_order

vst_table <- vst(as.matrix(counts_liver))

vst_table_df <- t(vst_table)
pca.tumor <- prcomp(vst_table_df)
summary(pca.tumor)
## Importance of components:
##                            PC1     PC2      PC3      PC4      PC5      PC6
## Standard deviation     66.2189 60.4903 50.83139 38.81353 37.14832 33.13239
## Proportion of Variance  0.1124  0.0938  0.06624  0.03862  0.03538  0.02814
## Cumulative Proportion   0.1124  0.2062  0.27245  0.31107  0.34645  0.37459
##                             PC7      PC8      PC9     PC10     PC11     PC12
## Standard deviation     30.03564 27.67645 24.72885 23.23548 22.41560 21.03548
## Proportion of Variance  0.02313  0.01964  0.01568  0.01384  0.01288  0.01134
## Cumulative Proportion   0.39771  0.41735  0.43303  0.44687  0.45975  0.47109
##                            PC13     PC14     PC15     PC16     PC17     PC18
## Standard deviation     20.59000 19.49061 18.96662 17.62103 17.25819 16.93320
## Proportion of Variance  0.01087  0.00974  0.00922  0.00796  0.00764  0.00735
## Cumulative Proportion   0.48196  0.49170  0.50092  0.50888  0.51651  0.52386
##                            PC19     PC20    PC21    PC22     PC23     PC24
## Standard deviation     15.93474 15.63198 15.5561 15.1757 14.87444 14.80210
## Proportion of Variance  0.00651  0.00626  0.0062  0.0059  0.00567  0.00562
## Cumulative Proportion   0.53037  0.53664  0.5428  0.5487  0.55442  0.56003
##                            PC25    PC26     PC27     PC28     PC29    PC30
## Standard deviation     14.11845 13.8301 13.75902 13.41666 13.28879 13.0966
## Proportion of Variance  0.00511  0.0049  0.00485  0.00461  0.00453  0.0044
## Cumulative Proportion   0.56514  0.5700  0.57490  0.57951  0.58404  0.5884
##                            PC31     PC32     PC33    PC34     PC35     PC36
## Standard deviation     12.93584 12.79002 12.48329 12.3358 12.20697 11.90233
## Proportion of Variance  0.00429  0.00419  0.00399  0.0039  0.00382  0.00363
## Cumulative Proportion   0.59273  0.59692  0.60092  0.6048  0.60864  0.61227
##                            PC37     PC38     PC39     PC40     PC41     PC42
## Standard deviation     11.81195 11.71324 11.45599 11.37234 11.28823 11.19063
## Proportion of Variance  0.00358  0.00352  0.00336  0.00332  0.00327  0.00321
## Cumulative Proportion   0.61585  0.61936  0.62273  0.62604  0.62931  0.63252
##                            PC43     PC44     PC45     PC46     PC47     PC48
## Standard deviation     11.14433 11.02312 10.93664 10.79939 10.59888 10.57217
## Proportion of Variance  0.00318  0.00311  0.00307  0.00299  0.00288  0.00287
## Cumulative Proportion   0.63570  0.63882  0.64189  0.64487  0.64775  0.65062
##                            PC49     PC50     PC51     PC52     PC53     PC54
## Standard deviation     10.50339 10.38534 10.35504 10.19542 10.15638 10.05540
## Proportion of Variance  0.00283  0.00276  0.00275  0.00266  0.00264  0.00259
## Cumulative Proportion   0.65345  0.65621  0.65896  0.66163  0.66427  0.66686
##                            PC55    PC56    PC57    PC58    PC59    PC60    PC61
## Standard deviation     10.01562 9.91745 9.85720 9.81393 9.72360 9.65246 9.61267
## Proportion of Variance  0.00257 0.00252 0.00249 0.00247 0.00242 0.00239 0.00237
## Cumulative Proportion   0.66943 0.67196 0.67445 0.67692 0.67934 0.68173 0.68410
##                          PC62    PC63    PC64    PC65   PC66    PC67    PC68
## Standard deviation     9.4759 9.42952 9.37313 9.34285 9.2554 9.17226 9.11433
## Proportion of Variance 0.0023 0.00228 0.00225 0.00224 0.0022 0.00216 0.00213
## Cumulative Proportion  0.6864 0.68868 0.69093 0.69317 0.6954 0.69752 0.69965
##                          PC69    PC70    PC71    PC72    PC73    PC74    PC75
## Standard deviation     9.0541 8.95341 8.93008 8.86919 8.79246 8.74333 8.71550
## Proportion of Variance 0.0021 0.00206 0.00204 0.00202 0.00198 0.00196 0.00195
## Cumulative Proportion  0.7017 0.70381 0.70585 0.70787 0.70985 0.71181 0.71376
##                           PC76    PC77   PC78    PC79    PC80    PC81    PC82
## Standard deviation     8.70154 8.68756 8.6032 8.51945 8.50156 8.49045 8.43648
## Proportion of Variance 0.00194 0.00193 0.0019 0.00186 0.00185 0.00185 0.00182
## Cumulative Proportion  0.71570 0.71763 0.7195 0.72139 0.72324 0.72509 0.72692
##                           PC83    PC84    PC85    PC86    PC87    PC88    PC89
## Standard deviation     8.40262 8.31537 8.27790 8.22888 8.20397 8.16564 8.12088
## Proportion of Variance 0.00181 0.00177 0.00176 0.00174 0.00173 0.00171 0.00169
## Cumulative Proportion  0.72873 0.73050 0.73225 0.73399 0.73572 0.73743 0.73912
##                           PC90    PC91    PC92    PC93    PC94    PC95   PC96
## Standard deviation     8.09440 8.03922 8.02853 7.98425 7.94711 7.91977 7.8962
## Proportion of Variance 0.00168 0.00166 0.00165 0.00163 0.00162 0.00161 0.0016
## Cumulative Proportion  0.74080 0.74245 0.74410 0.74574 0.74736 0.74897 0.7506
##                           PC97    PC98    PC99   PC100   PC101   PC102   PC103
## Standard deviation     7.86426 7.81706 7.80025 7.77617 7.72169 7.69947 7.66627
## Proportion of Variance 0.00159 0.00157 0.00156 0.00155 0.00153 0.00152 0.00151
## Cumulative Proportion  0.75215 0.75372 0.75528 0.75683 0.75835 0.75987 0.76138
##                         PC104   PC105   PC106   PC107   PC108   PC109   PC110
## Standard deviation     7.6425 7.58165 7.57433 7.54302 7.52131 7.50193 7.47106
## Proportion of Variance 0.0015 0.00147 0.00147 0.00146 0.00145 0.00144 0.00143
## Cumulative Proportion  0.7629 0.76435 0.76582 0.76728 0.76873 0.77017 0.77161
##                          PC111   PC112  PC113  PC114   PC115   PC116   PC117
## Standard deviation     7.43719 7.42747 7.3982 7.3817 7.33785 7.30415 7.26904
## Proportion of Variance 0.00142 0.00141 0.0014 0.0014 0.00138 0.00137 0.00135
## Cumulative Proportion  0.77302 0.77444 0.7758 0.7772 0.77862 0.77999 0.78134
##                          PC118   PC119   PC120   PC121   PC122  PC123  PC124
## Standard deviation     7.22758 7.20132 7.17759 7.16990 7.15062 7.1320 7.1124
## Proportion of Variance 0.00134 0.00133 0.00132 0.00132 0.00131 0.0013 0.0013
## Cumulative Proportion  0.78268 0.78401 0.78533 0.78665 0.78796 0.7893 0.7906
##                          PC125   PC126   PC127   PC128   PC129   PC130   PC131
## Standard deviation     7.09317 7.05287 7.04517 6.99216 6.98468 6.93722 6.92417
## Proportion of Variance 0.00129 0.00128 0.00127 0.00125 0.00125 0.00123 0.00123
## Cumulative Proportion  0.79185 0.79312 0.79440 0.79565 0.79690 0.79813 0.79936
##                          PC132   PC133   PC134  PC135   PC136   PC137   PC138
## Standard deviation     6.90766 6.88579 6.88300 6.8354 6.82245 6.81843 6.79475
## Proportion of Variance 0.00122 0.00122 0.00121 0.0012 0.00119 0.00119 0.00118
## Cumulative Proportion  0.80059 0.80180 0.80302 0.8042 0.80541 0.80660 0.80778
##                          PC139   PC140   PC141   PC142   PC143   PC144   PC145
## Standard deviation     6.77205 6.73636 6.72509 6.71766 6.70752 6.68737 6.65455
## Proportion of Variance 0.00118 0.00116 0.00116 0.00116 0.00115 0.00115 0.00114
## Cumulative Proportion  0.80896 0.81012 0.81128 0.81244 0.81359 0.81474 0.81587
##                          PC146   PC147   PC148   PC149  PC150  PC151   PC152
## Standard deviation     6.64803 6.63572 6.61354 6.59938 6.5648 6.5552 6.52631
## Proportion of Variance 0.00113 0.00113 0.00112 0.00112 0.0011 0.0011 0.00109
## Cumulative Proportion  0.81701 0.81813 0.81926 0.82037 0.8215 0.8226 0.82367
##                          PC153   PC154   PC155   PC156   PC157   PC158   PC159
## Standard deviation     6.50796 6.49522 6.48419 6.44951 6.44479 6.42496 6.40994
## Proportion of Variance 0.00109 0.00108 0.00108 0.00107 0.00106 0.00106 0.00105
## Cumulative Proportion  0.82476 0.82584 0.82691 0.82798 0.82905 0.83010 0.83116
##                          PC160   PC161   PC162   PC163   PC164   PC165   PC166
## Standard deviation     6.39299 6.37721 6.36509 6.35203 6.33379 6.32519 6.30938
## Proportion of Variance 0.00105 0.00104 0.00104 0.00103 0.00103 0.00103 0.00102
## Cumulative Proportion  0.83221 0.83325 0.83429 0.83532 0.83635 0.83737 0.83840
##                          PC167   PC168   PC169  PC170  PC171  PC172   PC173
## Standard deviation     6.30059 6.28765 6.28444 6.2606 6.2466 6.2346 6.22388
## Proportion of Variance 0.00102 0.00101 0.00101 0.0010 0.0010 0.0010 0.00099
## Cumulative Proportion  0.83941 0.84043 0.84144 0.8424 0.8434 0.8444 0.84543
##                          PC174   PC175   PC176   PC177   PC178   PC179   PC180
## Standard deviation     6.21188 6.19398 6.16575 6.15652 6.14708 6.13491 6.12347
## Proportion of Variance 0.00099 0.00098 0.00097 0.00097 0.00097 0.00096 0.00096
## Cumulative Proportion  0.84642 0.84741 0.84838 0.84935 0.85032 0.85129 0.85225
##                          PC181   PC182   PC183   PC184   PC185   PC186   PC187
## Standard deviation     6.10496 6.08151 6.07288 6.06381 6.03761 6.02356 6.00820
## Proportion of Variance 0.00096 0.00095 0.00095 0.00094 0.00093 0.00093 0.00093
## Cumulative Proportion  0.85320 0.85415 0.85510 0.85604 0.85697 0.85790 0.85883
##                          PC188   PC189   PC190   PC191   PC192  PC193  PC194
## Standard deviation     6.00471 5.98311 5.97582 5.95417 5.94457 5.9358 5.9202
## Proportion of Variance 0.00092 0.00092 0.00092 0.00091 0.00091 0.0009 0.0009
## Cumulative Proportion  0.85975 0.86067 0.86159 0.86250 0.86340 0.8643 0.8652
##                         PC195   PC196   PC197   PC198   PC199   PC200   PC201
## Standard deviation     5.9159 5.90440 5.88657 5.87824 5.86614 5.85466 5.83587
## Proportion of Variance 0.0009 0.00089 0.00089 0.00089 0.00088 0.00088 0.00087
## Cumulative Proportion  0.8661 0.86699 0.86788 0.86877 0.86965 0.87053 0.87140
##                          PC202   PC203   PC204   PC205   PC206   PC207   PC208
## Standard deviation     5.82685 5.80908 5.80554 5.79277 5.78815 5.77404 5.76327
## Proportion of Variance 0.00087 0.00087 0.00086 0.00086 0.00086 0.00085 0.00085
## Cumulative Proportion  0.87227 0.87314 0.87400 0.87486 0.87572 0.87657 0.87743
##                          PC209   PC210   PC211   PC212   PC213   PC214   PC215
## Standard deviation     5.74654 5.73250 5.72419 5.72130 5.70775 5.69691 5.68702
## Proportion of Variance 0.00085 0.00084 0.00084 0.00084 0.00084 0.00083 0.00083
## Cumulative Proportion  0.87827 0.87912 0.87996 0.88079 0.88163 0.88246 0.88329
##                          PC216   PC217   PC218   PC219   PC220   PC221  PC222
## Standard deviation     5.68003 5.66410 5.65935 5.64917 5.62344 5.60512 5.6025
## Proportion of Variance 0.00083 0.00082 0.00082 0.00082 0.00081 0.00081 0.0008
## Cumulative Proportion  0.88412 0.88494 0.88576 0.88658 0.88739 0.88820 0.8890
##                         PC223  PC224   PC225   PC226   PC227   PC228   PC229
## Standard deviation     5.5867 5.5775 5.56768 5.56228 5.54801 5.53775 5.52942
## Proportion of Variance 0.0008 0.0008 0.00079 0.00079 0.00079 0.00079 0.00078
## Cumulative Proportion  0.8898 0.8906 0.89139 0.89219 0.89297 0.89376 0.89454
##                          PC230   PC231   PC232   PC233   PC234   PC235   PC236
## Standard deviation     5.52205 5.51287 5.50623 5.49011 5.48673 5.47425 5.45998
## Proportion of Variance 0.00078 0.00078 0.00078 0.00077 0.00077 0.00077 0.00076
## Cumulative Proportion  0.89533 0.89611 0.89688 0.89766 0.89843 0.89920 0.89996
##                          PC237   PC238   PC239   PC240   PC241   PC242   PC243
## Standard deviation     5.45317 5.43706 5.42950 5.42328 5.40189 5.37561 5.36945
## Proportion of Variance 0.00076 0.00076 0.00076 0.00075 0.00075 0.00074 0.00074
## Cumulative Proportion  0.90072 0.90148 0.90224 0.90299 0.90374 0.90448 0.90522
##                          PC244   PC245   PC246   PC247   PC248   PC249   PC250
## Standard deviation     5.36577 5.35541 5.33641 5.32938 5.32848 5.31860 5.31274
## Proportion of Variance 0.00074 0.00074 0.00073 0.00073 0.00073 0.00073 0.00072
## Cumulative Proportion  0.90596 0.90669 0.90742 0.90815 0.90888 0.90960 0.91033
##                          PC251   PC252   PC253   PC254   PC255   PC256  PC257
## Standard deviation     5.30960 5.30037 5.28031 5.26938 5.25997 5.24633 5.2420
## Proportion of Variance 0.00072 0.00072 0.00071 0.00071 0.00071 0.00071 0.0007
## Cumulative Proportion  0.91105 0.91177 0.91248 0.91319 0.91390 0.91461 0.9153
##                         PC258  PC259   PC260   PC261   PC262   PC263   PC264
## Standard deviation     5.2302 5.2151 5.20514 5.19917 5.18928 5.18299 5.17808
## Proportion of Variance 0.0007 0.0007 0.00069 0.00069 0.00069 0.00069 0.00069
## Cumulative Proportion  0.9160 0.9167 0.91741 0.91810 0.91879 0.91948 0.92017
##                          PC265   PC266   PC267   PC268   PC269   PC270   PC271
## Standard deviation     5.16887 5.14921 5.14114 5.13361 5.12148 5.11617 5.10591
## Proportion of Variance 0.00068 0.00068 0.00068 0.00068 0.00067 0.00067 0.00067
## Cumulative Proportion  0.92085 0.92153 0.92221 0.92288 0.92356 0.92423 0.92490
##                          PC272   PC273   PC274   PC275   PC276   PC277   PC278
## Standard deviation     5.09532 5.08570 5.07254 5.05696 5.04961 5.04434 5.03740
## Proportion of Variance 0.00067 0.00066 0.00066 0.00066 0.00065 0.00065 0.00065
## Cumulative Proportion  0.92556 0.92622 0.92688 0.92754 0.92819 0.92885 0.92950
##                          PC279   PC280   PC281   PC282   PC283   PC284   PC285
## Standard deviation     5.03214 5.02372 5.02127 5.00657 4.99789 4.99025 4.98303
## Proportion of Variance 0.00065 0.00065 0.00065 0.00064 0.00064 0.00064 0.00064
## Cumulative Proportion  0.93015 0.93079 0.93144 0.93208 0.93272 0.93336 0.93400
##                          PC286   PC287   PC288   PC289   PC290   PC291   PC292
## Standard deviation     4.97874 4.96314 4.95727 4.94397 4.93021 4.92624 4.91213
## Proportion of Variance 0.00064 0.00063 0.00063 0.00063 0.00062 0.00062 0.00062
## Cumulative Proportion  0.93463 0.93526 0.93589 0.93652 0.93714 0.93777 0.93838
##                          PC293   PC294   PC295   PC296   PC297   PC298  PC299
## Standard deviation     4.90909 4.90160 4.89691 4.87850 4.87178 4.86361 4.8509
## Proportion of Variance 0.00062 0.00062 0.00061 0.00061 0.00061 0.00061 0.0006
## Cumulative Proportion  0.93900 0.93962 0.94023 0.94084 0.94145 0.94206 0.9427
##                         PC300  PC301  PC302  PC303   PC304   PC305   PC306
## Standard deviation     4.8386 4.8359 4.8288 4.8185 4.80154 4.79106 4.78740
## Proportion of Variance 0.0006 0.0006 0.0006 0.0006 0.00059 0.00059 0.00059
## Cumulative Proportion  0.9433 0.9439 0.9445 0.9450 0.94564 0.94623 0.94682
##                          PC307   PC308   PC309   PC310   PC311   PC312   PC313
## Standard deviation     4.77642 4.77353 4.76223 4.75264 4.74697 4.74254 4.73571
## Proportion of Variance 0.00058 0.00058 0.00058 0.00058 0.00058 0.00058 0.00057
## Cumulative Proportion  0.94740 0.94799 0.94857 0.94915 0.94973 0.95030 0.95088
##                          PC314   PC315   PC316   PC317   PC318   PC319   PC320
## Standard deviation     4.72373 4.71295 4.70918 4.70470 4.68482 4.66986 4.66016
## Proportion of Variance 0.00057 0.00057 0.00057 0.00057 0.00056 0.00056 0.00056
## Cumulative Proportion  0.95145 0.95202 0.95259 0.95316 0.95372 0.95428 0.95483
##                          PC321   PC322   PC323   PC324   PC325   PC326   PC327
## Standard deviation     4.65292 4.64273 4.63501 4.62715 4.61816 4.61295 4.60138
## Proportion of Variance 0.00055 0.00055 0.00055 0.00055 0.00055 0.00055 0.00054
## Cumulative Proportion  0.95539 0.95594 0.95649 0.95704 0.95759 0.95813 0.95868
##                          PC328   PC329   PC330   PC331   PC332   PC333   PC334
## Standard deviation     4.59559 4.58464 4.57895 4.56647 4.55779 4.55681 4.54824
## Proportion of Variance 0.00054 0.00054 0.00054 0.00053 0.00053 0.00053 0.00053
## Cumulative Proportion  0.95922 0.95976 0.96029 0.96083 0.96136 0.96189 0.96242
##                          PC335   PC336   PC337   PC338   PC339   PC340   PC341
## Standard deviation     4.52950 4.52450 4.52059 4.50711 4.49866 4.48588 4.48235
## Proportion of Variance 0.00053 0.00052 0.00052 0.00052 0.00052 0.00052 0.00052
## Cumulative Proportion  0.96295 0.96347 0.96400 0.96452 0.96504 0.96555 0.96607
##                          PC342   PC343   PC344   PC345  PC346  PC347  PC348
## Standard deviation     4.46636 4.46061 4.45713 4.44465 4.4262 4.4189 4.4026
## Proportion of Variance 0.00051 0.00051 0.00051 0.00051 0.0005 0.0005 0.0005
## Cumulative Proportion  0.96658 0.96709 0.96760 0.96811 0.9686 0.9691 0.9696
##                         PC349   PC350   PC351   PC352   PC353   PC354   PC355
## Standard deviation     4.3978 4.37642 4.37232 4.35889 4.34876 4.34542 4.32851
## Proportion of Variance 0.0005 0.00049 0.00049 0.00049 0.00048 0.00048 0.00048
## Cumulative Proportion  0.9701 0.97059 0.97108 0.97157 0.97205 0.97254 0.97302
##                          PC356   PC357   PC358   PC359   PC360   PC361   PC362
## Standard deviation     4.32059 4.30308 4.29827 4.29106 4.28856 4.27829 4.26249
## Proportion of Variance 0.00048 0.00047 0.00047 0.00047 0.00047 0.00047 0.00047
## Cumulative Proportion  0.97350 0.97397 0.97445 0.97492 0.97539 0.97586 0.97632
##                          PC363   PC364   PC365   PC366   PC367   PC368   PC369
## Standard deviation     4.25341 4.23846 4.23659 4.23118 4.21863 4.20706 4.19351
## Proportion of Variance 0.00046 0.00046 0.00046 0.00046 0.00046 0.00045 0.00045
## Cumulative Proportion  0.97679 0.97725 0.97771 0.97817 0.97862 0.97908 0.97953
##                          PC370   PC371   PC372   PC373   PC374   PC375   PC376
## Standard deviation     4.18810 4.17791 4.17259 4.15299 4.14799 4.13346 4.11874
## Proportion of Variance 0.00045 0.00045 0.00045 0.00044 0.00044 0.00044 0.00043
## Cumulative Proportion  0.97998 0.98043 0.98087 0.98131 0.98176 0.98219 0.98263
##                          PC377   PC378   PC379   PC380   PC381   PC382   PC383
## Standard deviation     4.11160 4.10540 4.08205 4.07504 4.06574 4.04579 4.04418
## Proportion of Variance 0.00043 0.00043 0.00043 0.00043 0.00042 0.00042 0.00042
## Cumulative Proportion  0.98306 0.98349 0.98392 0.98435 0.98477 0.98519 0.98561
##                          PC384   PC385   PC386   PC387   PC388  PC389  PC390
## Standard deviation     4.02632 4.01997 4.00707 4.00413 3.99444 3.9718 3.9617
## Proportion of Variance 0.00042 0.00041 0.00041 0.00041 0.00041 0.0004 0.0004
## Cumulative Proportion  0.98602 0.98644 0.98685 0.98726 0.98767 0.9881 0.9885
##                         PC391  PC392   PC393   PC394   PC395   PC396   PC397
## Standard deviation     3.9486 3.9340 3.92043 3.91862 3.90287 3.89173 3.87552
## Proportion of Variance 0.0004 0.0004 0.00039 0.00039 0.00039 0.00039 0.00039
## Cumulative Proportion  0.9889 0.9893 0.98967 0.99006 0.99045 0.99084 0.99123
##                          PC398   PC399   PC400   PC401   PC402   PC403   PC404
## Standard deviation     3.86324 3.85069 3.83834 3.82786 3.80718 3.80150 3.78391
## Proportion of Variance 0.00038 0.00038 0.00038 0.00038 0.00037 0.00037 0.00037
## Cumulative Proportion  0.99161 0.99199 0.99237 0.99274 0.99311 0.99348 0.99385
##                          PC405   PC406   PC407   PC408   PC409   PC410   PC411
## Standard deviation     3.75824 3.73651 3.73300 3.72056 3.70522 3.69386 3.67066
## Proportion of Variance 0.00036 0.00036 0.00036 0.00035 0.00035 0.00035 0.00035
## Cumulative Proportion  0.99421 0.99457 0.99493 0.99528 0.99563 0.99598 0.99633
##                          PC412   PC413   PC414   PC415   PC416   PC417   PC418
## Standard deviation     3.63332 3.62893 3.58688 3.55919 3.53632 3.50618 3.46783
## Proportion of Variance 0.00034 0.00034 0.00033 0.00032 0.00032 0.00032 0.00031
## Cumulative Proportion  0.99667 0.99701 0.99734 0.99766 0.99798 0.99830 0.99860
##                          PC419   PC420   PC421   PC422   PC423     PC424
## Standard deviation     3.45167 3.36417 3.32868 3.18659 3.16115 1.467e-13
## Proportion of Variance 0.00031 0.00029 0.00028 0.00026 0.00026 0.000e+00
## Cumulative Proportion  0.99891 0.99920 0.99948 0.99974 1.00000 1.000e+00
#recount3_rse_LIHC@colData
nt <- lihc_tcga_metadata_order$tcga_barcode[lihc_tcga_metadata_order$cgc_sample_sample_type == "Solid Tissue Normal"] 

tumor_norm <- ifelse(lihc_tcga_metadata_order$tcga_barcode %in% nt, "black", "red")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm  , main = "PCA of LIHC", xlab = "PC1 (11.24%)", ylab = "PC2 (9.38%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("tumor", "normal"), pch = 21, pt.bg = c("red", "black"), col = "black")

#recount3_rse_LIHC@colData
nt <- lihc_tcga_metadata_order$tcga_barcode[lihc_tcga_metadata_order$xml_days_to_birth < -20000] 

tumor_norm <- ifelse(lihc_tcga_metadata_order$tcga_barcode %in% nt, "black", "red")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm  , main = "PCA of LIHC", xlab = "PC1 (11.24%)", ylab = "PC2 (9.38%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c(">-2000 day until birth", ">-2000 day until birth"), pch = 21, pt.bg = c("red", "black"), col = "black")

lung cancer import the lung data

lung_gtex_counts <- readRDS("~/data/recount3/recount3_fix_download/lung_gtex_counts.rds")
lung_gtex_metadata <- readRDS("~/data/recount3/recount3_fix_download/lung_gtex_metadata.rds")
colnames(lung_gtex_counts)[1:5]
## [1] "GTEX.PLZ5.0726.SM.2I5F9.1"  "GTEX.12WSN.0626.SM.5BC61.1"
## [3] "GTEX.11TT1.1626.SM.5EQL7.1" "GTEX.PX3G.0526.SM.2I3EM.1" 
## [5] "GTEX.11P7K.0326.SM.59871.1"
lung_gtex_metadata$external_id[1:5]
## [1] "GTEX-111CU-0326-SM-5GZXO.1" "GTEX-111FC-1126-SM-5GZWU.1"
## [3] "GTEX-111VG-0726-SM-5GIDC.1" "GTEX-111YS-0626-SM-5GZXV.1"
## [5] "GTEX-1122O-0126-SM-5GICA.1"
ids<- str_replace_all(colnames(lung_gtex_counts), "[[:punct:]]", "-") 
meta_data_ids<- str_replace_all(lung_gtex_metadata$external_id, "[[:punct:]]", "-")
identical(ids[order(ids)], meta_data_ids[order(meta_data_ids)])
## [1] TRUE
lung_gtex_counts_order <- lung_gtex_counts[,order(ids)]
lung_gtex_metadata_order<- lung_gtex_metadata[order(meta_data_ids), ]
colnames(lung_gtex_counts_order )<- lung_gtex_metadata_order$external_id
recount3_count_lung <- as.data.frame(lung_gtex_counts_order)
saveRDS(recount3_count_lung,  "/home/rstudio/data/lung_gtex_count_ordered.rds")
saveRDS(lung_gtex_metadata_order,  "/home/rstudio/data/lung_gtex_metadata_ordered.rds")
counts_lung <- recount3_count_lung

vst_table <- vst(as.matrix(counts_lung))

vst_table_df <- t(vst_table)
pca.tumor <- prcomp(vst_table_df)
x<- summary(pca.tumor)
y<- x$importance
y[,1:10]
##                             PC1      PC2      PC3      PC4      PC5      PC6
## Standard deviation     48.81666 36.70425 32.96865 28.25734 23.56789 22.67199
## Proportion of Variance  0.14496  0.08195  0.06612  0.04857  0.03379  0.03127
## Cumulative Proportion   0.14496  0.22690  0.29302  0.34159  0.37537  0.40664
##                             PC7      PC8      PC9     PC10
## Standard deviation     20.30030 18.74630 17.71035 16.30350
## Proportion of Variance  0.02507  0.02138  0.01908  0.01617
## Cumulative Proportion   0.43171  0.45308  0.47216  0.48833
sex<- lung_gtex_metadata_order$external_id[lung_gtex_metadata_order$SEX == "2"]
sex<- sex[!is.na(sex)]

tumor_norm <- ifelse( lung_gtex_metadata_order$external_id %in% sex, "red", "black")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm  , main = "PCA of GTEx Lung", xlab = "PC1 (14.496%)", ylab = "PC2 (8.195%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("female", "male/normal"), pch = 21, pt.bg = c("red", "black"), col = "black")

age<- lung_gtex_metadata_order$external_id[lung_gtex_metadata_order$AGE ==  "70-79"]
age<- sex[!is.na(age)]

tumor_norm <- ifelse( lung_gtex_metadata_order$external_id %in% age, "red", "black")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm  ,  main = "PCA of GTEx Lung", xlab = "PC1 (14.496%)", ylab = "PC2 (8.195%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("70-79", "!70-79"), pch = 21, pt.bg = c("red", "black"), col = "black")

age<- lung_gtex_metadata_order$external_id[lung_gtex_metadata_order$AGE==  "20-29"]
age<- age[!is.na(age)]

tumor_norm <- ifelse( lung_gtex_metadata_order$external_id %in% age, "red", "black")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm  ,  main = "PCA of GTEx Lung", xlab = "PC1 (14.496%)", ylab = "PC2 (8.195%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("20-29", "! 20-29"), pch = 21, pt.bg = c("red", "black"), col = "black")

rin<- lung_gtex_metadata_order$external_id[lung_gtex_metadata_order$SMRIN >= 7]
rin<- rin[!is.na(rin)]

tumor_norm <- ifelse( lung_gtex_metadata_order$external_id %in% rin, "red", "black")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm  ,  main = "PCA of GTEx Lung", xlab = "PC1 (14.496%)", ylab = "PC2 (8.195%)",cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c(">= 7", "< 7"), pch = 21, pt.bg = c("red", "black"), col = "black")

SMTSISCH

time<- lung_gtex_metadata_order$external_id[lung_gtex_metadata_order$SMTSISCH >= 500]
time<- time[!is.na(time)]

tumor_norm <- ifelse( lung_gtex_metadata_order$external_id %in% time, "red", "black")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm  ,  main = "PCA of GTEx Lung", xlab = "PC1 (14.496%)", ylab = "PC2 (8.195%)",cex.axis = "1.5", cex.lab = "1.5")
legend("bottomleft", legend = c(">= 500", "< 500"), pch = 21, pt.bg = c("red", "black"), col = "black")

LAUD

luad_tcga_metadata <- readRDS("~/data/recount3/recount3_fix_download/luad_tcga_metadata.rds")
luad_tcga_counts <- readRDS("~/data/recount3/recount3_fix_download/luad_tcga_counts.rds")

check counts and metadata info

dim(luad_tcga_counts)
## [1] 63856   601
dim(luad_tcga_metadata)
## [1] 601 840
nchar(colnames(luad_tcga_counts)[1])
## [1] 36
ids<- substrRight(colnames(luad_tcga_counts), 36)
ids<- str_replace_all(ids, "[[:punct:]]", "-")
colnames(luad_tcga_counts) <- ids
luad_tcga_counts_order <- luad_tcga_counts[,order(ids)]
luad_tcga_metadata_order<- luad_tcga_metadata[order(luad_tcga_metadata$external_id), ]
identical(colnames(luad_tcga_counts_order), luad_tcga_metadata_order$external_id)
## [1] TRUE
colnames(luad_tcga_counts_order)<- luad_tcga_metadata_order$tcga_barcode
saveRDS(luad_tcga_counts_order,  "/home/rstudio/data/luad_tcga_count_ordered.rds")
saveRDS(luad_tcga_metadata_order,  "/home/rstudio/data/luad_tcga_metadata_ordered.rds")
vst_table <- vst(as.matrix(luad_tcga_counts_order))

vst_table_df <- t(vst_table)
pca.tumor <- prcomp(vst_table_df)
x<- summary(pca.tumor)
y<- x$importance
y[,1:10]
##                             PC1      PC2      PC3      PC4      PC5      PC6
## Standard deviation     57.84925 52.53246 38.85727 34.60496 32.78108 29.92140
## Proportion of Variance  0.11415  0.09414  0.05150  0.04085  0.03666  0.03054
## Cumulative Proportion   0.11415  0.20829  0.25979  0.30064  0.33730  0.36784
##                             PC7      PC8      PC9     PC10
## Standard deviation     28.22511 26.33350 25.44943 21.65315
## Proportion of Variance  0.02717  0.02365  0.02209  0.01599
## Cumulative Proportion   0.39501  0.41867  0.44076  0.45675
#recount3_rse_LIHC@colData
nt <- luad_tcga_metadata_order$tcga_barcode[luad_tcga_metadata_order$cgc_sample_sample_type == "Solid Tissue Normal"] 

tumor_norm <- ifelse(luad_tcga_metadata_order$tcga_barcode%in% nt, "black", "red")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm  , main = "PCA of LUAD", xlab = "PC1 (11.42%)", ylab = "PC2 (9.414%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("tumor", "normal"), pch = 21, pt.bg = c("red", "black"), col = "black")

#recount3_rse_LIHC@colData
sex <- luad_tcga_metadata_order$tcga_barcode[luad_tcga_metadata_order$cgc_case_gender == "FEMALE"] 


tumor_norm <- ifelse(luad_tcga_metadata_order$tcga_barcode %in% sex, "black", "red")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm  , main = "PCA of LUAD", xlab = "PC1 (11.24%)", ylab = "PC2 (9.38%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c("male", "female"), pch = 21, pt.bg = c("red", "black"), col = "black")

#recount3_rse_LIHC@colData
age <- luad_tcga_metadata_order$tcga_barcode[luad_tcga_metadata_order$xml_days_to_birth < -20000] 


tumor_norm <- ifelse(luad_tcga_metadata_order$tcga_barcode %in% age, "black", "red")
plot(pca.tumor$x[, 1], pca.tumor$x[, 2], pch = 20, col = tumor_norm  , main = "PCA of LUAD", xlab = "PC1 (11.24%)", ylab = "PC2 (9.38%)", cex.axis = "1.5", cex.lab = "1.5")
legend("topleft", legend = c(">-2000 day until birth", ">-2000 day until birth"), pch = 21, pt.bg = c("red", "black"), col = "black")

list <- names(pca.tumor$x[, 2] )[pca.tumor$x[, 2] > 50]
metadata <- as.data.frame(luad_tcga_metadata_order)
metadata_test <- metadata[luad_tcga_metadata_order$tcga_barcode %in% list, ]

I don’ see a reason for the PC2 in the metadata ### Save Data ### Save Figures

END

Location of final scripts:
/scripts 

Location of data produced:
na

Dates when operations were done:
220524

Versions

sessionInfo()
## R version 4.1.3 (2022-03-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.8.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] DESeq2_1.34.0               stringr_1.4.1              
##  [3] recount3_1.4.0              SummarizedExperiment_1.24.0
##  [5] Biobase_2.54.0              GenomicRanges_1.46.1       
##  [7] GenomeInfoDb_1.30.1         IRanges_2.28.0             
##  [9] S4Vectors_0.32.4            BiocGenerics_0.40.0        
## [11] MatrixGenerics_1.6.0        matrixStats_0.62.0         
## 
## loaded via a namespace (and not attached):
##  [1] bitops_1.0-7             bit64_4.0.5              filelock_1.0.2          
##  [4] RColorBrewer_1.1-3       httr_1.4.4               tools_4.1.3             
##  [7] bslib_0.4.0              utf8_1.2.2               R6_2.5.1                
## [10] colorspace_2.0-3         DBI_1.1.3                tidyselect_1.1.2        
## [13] bit_4.0.4                curl_4.3.2               compiler_4.1.3          
## [16] cli_3.4.1                DelayedArray_0.20.0      rtracklayer_1.54.0      
## [19] sass_0.4.2               scales_1.2.1             genefilter_1.76.0       
## [22] rappdirs_0.3.3           digest_0.6.29            Rsamtools_2.10.0        
## [25] rmarkdown_2.16           R.utils_2.12.0           XVector_0.34.0          
## [28] pkgconfig_2.0.3          htmltools_0.5.3          sessioninfo_1.2.2       
## [31] highr_0.9                dbplyr_2.2.1             fastmap_1.1.0           
## [34] rlang_1.0.6              rstudioapi_0.13          RSQLite_2.2.17          
## [37] jquerylib_0.1.4          BiocIO_1.4.0             generics_0.1.3          
## [40] jsonlite_1.8.0           BiocParallel_1.28.3      dplyr_1.0.10            
## [43] R.oo_1.25.0              RCurl_1.98-1.8           magrittr_2.0.3          
## [46] GenomeInfoDbData_1.2.7   Matrix_1.5-1             Rcpp_1.0.9              
## [49] munsell_0.5.0            fansi_1.0.3              lifecycle_1.0.2         
## [52] R.methodsS3_1.8.2        stringi_1.7.8            yaml_2.3.5              
## [55] zlibbioc_1.40.0          BiocFileCache_2.2.1      grid_4.1.3              
## [58] blob_1.2.3               parallel_4.1.3           crayon_1.5.2            
## [61] lattice_0.20-45          Biostrings_2.62.0        splines_4.1.3           
## [64] annotate_1.72.0          KEGGREST_1.34.0          locfit_1.5-9.6          
## [67] knitr_1.40               pillar_1.8.1             rjson_0.2.21            
## [70] geneplotter_1.72.0       XML_3.99-0.10            glue_1.6.2              
## [73] evaluate_0.16            data.table_1.14.2        vctrs_0.4.2             
## [76] png_0.1-7                gtable_0.3.1             purrr_0.3.4             
## [79] assertthat_0.2.1         cachem_1.0.6             ggplot2_3.3.6           
## [82] xfun_0.33                xtable_1.8-4             restfulr_0.0.15         
## [85] survival_3.3-1           tibble_3.1.8             GenomicAlignments_1.30.0
## [88] AnnotationDbi_1.56.2     memoise_2.0.1